From 0f5c6657a4bf1210c70555f33b7f81fe51645b23 Mon Sep 17 00:00:00 2001 From: Dmitry Dygalo Date: Mon, 9 Mar 2026 10:47:43 +0100 Subject: [PATCH 01/14] chore: Split Registry Signed-off-by: Dmitry Dygalo --- CHANGELOG.md | 8 + MIGRATION.md | 69 + crates/jsonschema-cli/src/main.rs | 26 +- crates/jsonschema-py/src/lib.rs | 24 +- crates/jsonschema-py/src/registry.rs | 24 +- crates/jsonschema-py/tests-py/test_bundle.py | 47 + .../jsonschema-py/tests-py/test_registry.py | 44 +- crates/jsonschema-rb/spec/bundle_spec.rb | 17 + crates/jsonschema-rb/src/lib.rs | 35 +- crates/jsonschema-rb/src/options.rs | 11 +- crates/jsonschema-rb/src/registry.rs | 39 +- crates/jsonschema-referencing/Cargo.toml | 2 + .../jsonschema-referencing/benches/anchor.rs | 14 +- .../jsonschema-referencing/benches/pointer.rs | 13 +- .../benches/registry.rs | 18 +- .../jsonschema-referencing/src/anchors/mod.rs | 255 +- crates/jsonschema-referencing/src/cache.rs | 6 - crates/jsonschema-referencing/src/lib.rs | 8 +- crates/jsonschema-referencing/src/path.rs | 46 + crates/jsonschema-referencing/src/registry.rs | 2782 ++++++++++++----- crates/jsonschema-referencing/src/resolver.rs | 38 +- crates/jsonschema-referencing/src/resource.rs | 241 +- .../jsonschema-referencing/src/small_map.rs | 259 ++ .../src/specification/draft201909.rs | 112 +- .../src/specification/draft4.rs | 92 +- .../src/specification/draft6.rs | 79 +- .../src/specification/draft7.rs | 86 +- .../src/specification/mod.rs | 151 +- .../src/specification/subresources.rs | 76 +- crates/jsonschema-referencing/tests/suite.rs | 19 +- crates/jsonschema/src/bundler.rs | 38 +- crates/jsonschema/src/compiler.rs | 290 +- crates/jsonschema/src/error.rs | 11 +- crates/jsonschema/src/keywords/ref_.rs | 4 +- .../src/keywords/unevaluated_items.rs | 2 +- .../src/keywords/unevaluated_properties.rs | 6 +- crates/jsonschema/src/lib.rs | 406 +-- crates/jsonschema/src/options.rs | 271 +- crates/jsonschema/src/paths.rs | 64 +- crates/jsonschema/src/retriever.rs | 40 +- crates/jsonschema/src/validator.rs | 5 +- crates/jsonschema/tests/bundle.rs | 324 +- fuzz/fuzz_targets/referencing.rs | 15 +- profiler/Cargo.toml | 4 + profiler/Justfile | 3 +- profiler/src/main.rs | 28 +- 46 files changed, 4125 insertions(+), 2027 deletions(-) create mode 100644 crates/jsonschema-referencing/src/path.rs create mode 100644 crates/jsonschema-referencing/src/small_map.rs diff --git a/CHANGELOG.md b/CHANGELOG.md index f03a3ccc..93f79491 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,14 @@ ## [Unreleased] +### Breaking Changes + +- Registry construction now uses an explicit prepare step, and `with_registry` now borrows the prepared registry. `ValidationOptions::with_resource` and `ValidationOptions::with_resources` were removed in favor of building a `Registry` first. See the [Migration Guide](MIGRATION.md) for the details. + +### Performance + +- Avoid registry clones and document clones during validator construction. This improves real-world schema compilation by roughly 10-20% in internal benchmarks. + ## [0.45.1] - 2026-04-06 ### Fixed diff --git a/MIGRATION.md b/MIGRATION.md index c22fab9a..1dc27005 100644 --- a/MIGRATION.md +++ b/MIGRATION.md @@ -1,5 +1,74 @@ # Migration Guide +## Upgrading from 0.45.x to 0.46.0 + +Registry construction is now explicit: add shared schemas first, then call +`prepare()` to build a reusable registry. Validators no longer take ownership of +that registry; pass it by reference with `with_registry(®istry)`. +`ValidationOptions::with_resource` and `ValidationOptions::with_resources` were +removed in favor of building a `Registry` first. For cases with multiple shared +schemas, `extend([...])` is the batch form of `add(...)`. + +```rust +// Old (0.45.x) +use jsonschema::{Registry, Resource}; + +// Inline shared schema +let validator = jsonschema::options() + .with_resource( + "https://example.com/schema", + Resource::from_contents(shared_schema), + ) + .build(&schema)?; + +// Multiple shared schemas +let validator = jsonschema::options() + .with_resources([ + ( + "https://example.com/schema-1", + Resource::from_contents(schema_1), + ), + ( + "https://example.com/schema-2", + Resource::from_contents(schema_2), + ), + ].into_iter()) + .build(&schema)?; + +// Prebuilt registry +let registry = Registry::try_from_resources([ + ( + "https://example.com/schema", + Resource::from_contents(shared_schema), + ), +])?; +let validator = jsonschema::options() + .with_registry(registry) + .build(&schema)?; + +// New (0.46.0) +use jsonschema::Registry; + +// Shared registry + borrowed validator build +let registry = Registry::new() + .add("https://example.com/schema", shared_schema)? + .prepare()?; +let validator = jsonschema::options() + .with_registry(®istry) + .build(&schema)?; + +// Multiple shared schemas +let registry = Registry::new() + .extend([ + ("https://example.com/schema-1", schema_1), + ("https://example.com/schema-2", schema_2), + ])? + .prepare()?; +let validator = jsonschema::options() + .with_registry(®istry) + .build(&schema)?; +``` + ## Upgrading from 0.38.x to 0.39.0 ### Custom keyword API simplified diff --git a/crates/jsonschema-cli/src/main.rs b/crates/jsonschema-cli/src/main.rs index 6628abdd..4959c63f 100644 --- a/crates/jsonschema-cli/src/main.rs +++ b/crates/jsonschema-cli/src/main.rs @@ -444,10 +444,10 @@ fn path_to_uri(path: &std::path::Path) -> String { result } -fn options_for_schema( +fn options_for_schema<'a>( schema_path: &Path, http_options: Option<&jsonschema::HttpOptions>, -) -> Result> { +) -> Result, Box> { let base_uri = path_to_uri(schema_path); let base_uri = referencing::uri::from_str(&base_uri)?; let mut options = jsonschema::options().with_base_uri(base_uri); @@ -695,16 +695,30 @@ fn run_bundle(args: BundleArgs) -> ExitCode { Err(error) => return fail_with_error(error), }; + let mut registry = if let Some(http_opts) = http_options.as_ref() { + let retriever = match jsonschema::HttpRetriever::new(http_opts) { + Ok(retriever) => retriever, + Err(error) => return fail_with_error(error), + }; + jsonschema::Registry::new().retriever(retriever) + } else { + jsonschema::Registry::new() + }; for (uri, path) in &resources { let resource_json = match read_json(path) { Ok(value) => value, Err(error) => return fail_with_error(error), }; - opts = opts.with_resource( - uri.as_str(), - referencing::Resource::from_contents(resource_json), - ); + registry = match registry.add(uri, resource_json) { + Ok(registry) => registry, + Err(error) => return fail_with_error(error), + }; } + let registry = match registry.prepare() { + Ok(registry) => registry, + Err(error) => return fail_with_error(error), + }; + opts = opts.with_registry(®istry); match opts.bundle(&schema_json) { Ok(bundled) => { diff --git a/crates/jsonschema-py/src/lib.rs b/crates/jsonschema-py/src/lib.rs index 9c946282..86e6d3c2 100644 --- a/crates/jsonschema-py/src/lib.rs +++ b/crates/jsonschema-py/src/lib.rs @@ -832,19 +832,19 @@ impl jsonschema::Keyword for CustomKeyword { } } -fn make_options( +fn make_options<'a>( draft: Option, - formats: Option<&Bound<'_, PyDict>>, + formats: Option<&Bound<'a, PyDict>>, validate_formats: Option, ignore_unknown_formats: Option, - retriever: Option<&Bound<'_, PyAny>>, - registry: Option<®istry::Registry>, + retriever: Option<&Bound<'a, PyAny>>, + registry: Option<&'a registry::Registry>, base_uri: Option, - pattern_options: Option<&Bound<'_, PyAny>>, - email_options: Option<&Bound<'_, PyAny>>, - http_options: Option<&Bound<'_, PyAny>>, - keywords: Option<&Bound<'_, PyDict>>, -) -> PyResult { + pattern_options: Option<&Bound<'a, PyAny>>, + email_options: Option<&Bound<'a, PyAny>>, + http_options: Option<&Bound<'a, PyAny>>, + keywords: Option<&Bound<'a, PyDict>>, +) -> PyResult> { let mut options = jsonschema::options(); if let Some(raw_draft_version) = draft { options = options.with_draft(get_draft(raw_draft_version)?); @@ -890,7 +890,7 @@ fn make_options( options = options.with_retriever(Retriever { func }); } if let Some(registry) = registry { - options = options.with_registry(registry.inner.clone()); + options = options.with_registry(®istry.inner); } if let Some(base_uri) = base_uri { options = options.with_base_uri(base_uri); @@ -2021,7 +2021,7 @@ mod meta { let schema = crate::ser::to_value(schema)?; let result = if let Some(registry) = registry { jsonschema::meta::options() - .with_registry(registry.inner.clone()) + .with_registry(®istry.inner) .validate(&schema) } else { jsonschema::meta::validate(&schema) @@ -2070,7 +2070,7 @@ mod meta { let schema = crate::ser::to_value(schema)?; let result = if let Some(registry) = registry { jsonschema::meta::options() - .with_registry(registry.inner.clone()) + .with_registry(®istry.inner) .validate(&schema) } else { jsonschema::meta::validate(&schema) diff --git a/crates/jsonschema-py/src/registry.rs b/crates/jsonschema-py/src/registry.rs index d10fde99..da9458a2 100644 --- a/crates/jsonschema-py/src/registry.rs +++ b/crates/jsonschema-py/src/registry.rs @@ -1,4 +1,3 @@ -use jsonschema::Resource; use pyo3::{exceptions::PyValueError, prelude::*}; use crate::{get_draft, retriever::into_retriever, to_value, Retriever}; @@ -6,7 +5,7 @@ use crate::{get_draft, retriever::into_retriever, to_value, Retriever}; /// A registry of JSON Schema resources, each identified by their canonical URIs. #[pyclass] pub(crate) struct Registry { - pub(crate) inner: jsonschema::Registry, + pub(crate) inner: jsonschema::Registry<'static>, } #[pymethods] @@ -19,30 +18,29 @@ impl Registry { draft: Option, retriever: Option<&Bound<'_, PyAny>>, ) -> PyResult { - let mut options = jsonschema::Registry::options(); + let mut builder = jsonschema::Registry::new(); if let Some(draft) = draft { - options = options.draft(get_draft(draft)?); + builder = builder.draft(get_draft(draft)?); } if let Some(retriever) = retriever { let func = into_retriever(retriever)?; - options = options.retriever(Retriever { func }); + builder = builder.retriever(Retriever { func }); } - let pairs = resources.try_iter()?.map(|item| { + for item in resources.try_iter()? { let pair = item?.unbind(); let (key, value) = pair.extract::<(Bound, Bound)>(py)?; let uri = key.extract::()?; let schema = to_value(&value)?; - let resource = Resource::from_contents(schema); - Ok((uri, resource)) - }); - - let pairs: Result, PyErr> = pairs.collect(); + builder = builder + .add(uri, schema) + .map_err(|e| PyValueError::new_err(e.to_string()))?; + } - let registry = options - .build(pairs?) + let registry = builder + .prepare() .map_err(|e| PyValueError::new_err(e.to_string()))?; Ok(Registry { inner: registry }) diff --git a/crates/jsonschema-py/tests-py/test_bundle.py b/crates/jsonschema-py/tests-py/test_bundle.py index 86579ee2..675e841e 100644 --- a/crates/jsonschema-py/tests-py/test_bundle.py +++ b/crates/jsonschema-py/tests-py/test_bundle.py @@ -43,6 +43,53 @@ def test_bundle_validates_identically(): assert not validator.is_valid({"age": 30}) +def test_bundle_with_registry_and_explicit_draft4_legacy_id_root(): + root = { + "id": "urn:root", + "type": "object", + "properties": {"value": {"$ref": "urn:string"}}, + "required": ["value"], + } + registry = jsonschema_rs.Registry( + resources=[("urn:string", {"type": "string"})], + draft=jsonschema_rs.Draft4, + ) + + bundled = jsonschema_rs.bundle(root, registry=registry, draft=jsonschema_rs.Draft4) + + assert bundled["properties"]["value"]["$ref"] == "urn:string" + assert "urn:string" in bundled["definitions"] + + +def test_bundle_uses_call_retriever_when_inline_root_adds_external_ref(): + def retrieve(uri: str): + if uri == "urn:external": + return {"type": "string"} + raise KeyError(f"Schema not found: {uri}") + + root = { + "type": "object", + "properties": {"value": {"$ref": "urn:external"}}, + "required": ["value"], + } + registry = jsonschema_rs.Registry(resources=[("urn:seed", {"type": "integer"})]) + + bundled = jsonschema_rs.bundle(root, registry=registry, retriever=retrieve) + + assert bundled["properties"]["value"]["$ref"] == "urn:external" + assert "urn:external" in bundled["$defs"] + + +def test_bundle_with_registry_accepts_equivalent_base_uri_with_empty_fragment(): + root = {"$id": "urn:root", "$ref": "urn:shared"} + registry = jsonschema_rs.Registry(resources=[("urn:shared", {"type": "integer"})]) + + bundled = jsonschema_rs.bundle(root, registry=registry, base_uri="urn:root#") + + assert bundled["$ref"] == "urn:shared" + assert bundled["$defs"]["urn:shared"]["type"] == "integer" + + def test_bundle_unresolvable_raises(): with pytest.raises(jsonschema_rs.ReferencingError): jsonschema_rs.bundle({"$ref": "https://example.com/missing.json"}) diff --git a/crates/jsonschema-py/tests-py/test_registry.py b/crates/jsonschema-py/tests-py/test_registry.py index d7984046..d240812d 100644 --- a/crates/jsonschema-py/tests-py/test_registry.py +++ b/crates/jsonschema-py/tests-py/test_registry.py @@ -85,7 +85,6 @@ def test_top_level_functions_with_registry(function): assert list(function(schema, VALID_PERSON, registry=registry)) == [] assert list(function(schema, INVALID_PERSON, registry=registry)) != [] - def test_validator_for_with_registry(): registry = Registry(NESTED_RESOURCES) schema = {"$ref": "https://example.com/person.json"} @@ -96,6 +95,21 @@ def test_validator_for_with_registry(): assert not validator.is_valid(INVALID_PERSON) +def test_validator_for_with_registry_and_explicit_draft4_legacy_id_root(): + registry = Registry([("urn:string", {"type": "string"})], draft=Draft4) + schema = { + "id": "urn:root", + "type": "object", + "properties": {"value": {"$ref": "urn:string"}}, + "required": ["value"], + } + + validator = Draft4Validator(schema, registry=registry) + + assert validator.is_valid({"value": "ok"}) + assert not validator.is_valid({"value": 42}) + + def test_registry_with_retriever_and_validation(): def retrieve(uri: str): if uri == "https://example.com/dynamic.json": @@ -118,6 +132,34 @@ def retrieve(uri: str): assert not dynamic_validator.is_valid("test") +def test_validator_for_uses_call_retriever_when_inline_root_adds_external_ref(): + def retrieve(uri: str): + if uri == "urn:external": + return {"type": "string"} + raise KeyError(f"Schema not found: {uri}") + + registry = Registry([("urn:seed", {"type": "integer"})]) + schema = { + "type": "object", + "properties": {"value": {"$ref": "urn:external"}}, + "required": ["value"], + } + + validator = validator_for(schema, registry=registry, retriever=retrieve) + assert validator.is_valid({"value": "ok"}) + assert not validator.is_valid({"value": 42}) + + +def test_validator_for_with_registry_accepts_equivalent_base_uri_with_empty_fragment(): + registry = Registry([("urn:shared", {"type": "integer"})]) + schema = {"$id": "urn:root", "$ref": "urn:shared"} + + validator = validator_for(schema, registry=registry, base_uri="urn:root#") + + assert validator.is_valid(1) + assert not validator.is_valid("x") + + def test_registry_error_propagation(): registry = Registry(NESTED_RESOURCES) diff --git a/crates/jsonschema-rb/spec/bundle_spec.rb b/crates/jsonschema-rb/spec/bundle_spec.rb index 924e1e47..2d2557c3 100644 --- a/crates/jsonschema-rb/spec/bundle_spec.rb +++ b/crates/jsonschema-rb/spec/bundle_spec.rb @@ -43,6 +43,23 @@ expect(validator.valid?({ "age" => 30 })).to be false end + it "bundles inline legacy-id root with registry and explicit draft4" do + root = { + "id" => "urn:root", + "type" => "object", + "properties" => { "value" => { "$ref" => "urn:string" } }, + "required" => ["value"] + } + registry = JSONSchema::Registry.new( + [["urn:string", { "type" => "string" }]], + draft: :draft4 + ) + + bundled = JSONSchema.bundle(root, registry: registry, draft: :draft4) + expect(bundled.dig("properties", "value", "$ref")).to eq("urn:string") + expect(bundled.dig("definitions", "urn:string")).not_to be_nil + end + it "raises when a $ref cannot be resolved" do expect do JSONSchema.bundle({ "$ref" => "https://example.com/missing.json" }) diff --git a/crates/jsonschema-rb/src/lib.rs b/crates/jsonschema-rb/src/lib.rs index 27186f9e..494fca71 100644 --- a/crates/jsonschema-rb/src/lib.rs +++ b/crates/jsonschema-rb/src/lib.rs @@ -84,16 +84,21 @@ struct BuiltValidator { fn build_validator( ruby: &Ruby, options: ValidationOptions, + registry: Option<&jsonschema::Registry<'_>>, retriever: Option, callback_roots: CallbackRoots, compilation_roots: Arc, schema: &serde_json::Value, ) -> Result { - let validator = match retriever { - Some(ret) => options.with_retriever(ret).build(schema), - None => options.build(schema), + let mut options = match retriever { + Some(ret) => options.with_retriever(ret), + None => options, + }; + if let Some(registry) = registry { + options = options.with_registry(registry); } - .map_err(|error| { + + let validator = options.build(schema).map_err(|error| { if let jsonschema::error::ValidationErrorKind::Referencing(err) = error.kind() { if let Some(message) = retriever_error_message(err) { Error::new(ruby.exception_arg_error(), message) @@ -157,7 +162,7 @@ fn build_parsed_options( ruby: &Ruby, kw: ExtractedKwargs, draft_override: Option, -) -> Result { +) -> Result, Error> { let ( draft_val, validate_formats, @@ -832,6 +837,7 @@ fn validator_for(ruby: &Ruby, args: &[Value]) -> Result { } = build_validator( ruby, parsed.options, + parsed.registry, parsed.retriever, parsed.callback_roots, parsed.compilation_roots, @@ -853,7 +859,11 @@ fn bundle(ruby: &Ruby, args: &[Value]) -> Result { let json_schema = to_schema_value(ruby, schema)?; let parsed = build_parsed_options(ruby, kw, None)?; - match parsed.options.bundle(&json_schema) { + let mut options = parsed.options; + if let Some(registry) = parsed.registry { + options = options.with_registry(registry); + } + match options.bundle(&json_schema) { Ok(bundled) => ser::value_to_ruby(ruby, &bundled), Err(e @ jsonschema::ReferencingError::Unretrievable { .. }) => { Err(referencing_error(ruby, e.to_string())) @@ -888,6 +898,7 @@ fn is_valid(ruby: &Ruby, args: &[Value]) -> Result { } = build_validator( ruby, parsed.options, + parsed.registry, parsed.retriever, parsed.callback_roots, parsed.compilation_roots, @@ -927,6 +938,7 @@ fn validate(ruby: &Ruby, args: &[Value]) -> Result<(), Error> { } = build_validator( ruby, parsed.options, + parsed.registry, parsed.retriever, parsed.callback_roots, parsed.compilation_roots, @@ -978,6 +990,7 @@ fn each_error(ruby: &Ruby, args: &[Value]) -> Result { } = build_validator( ruby, parsed.options, + parsed.registry, parsed.retriever, parsed.callback_roots, parsed.compilation_roots, @@ -1073,6 +1086,7 @@ fn evaluate(ruby: &Ruby, args: &[Value]) -> Result { } = build_validator( ruby, parsed.options, + parsed.registry, parsed.retriever, parsed.callback_roots, parsed.compilation_roots, @@ -1126,6 +1140,7 @@ macro_rules! define_draft_validator { } = build_validator( ruby, parsed.options, + parsed.registry, parsed.retriever, parsed.callback_roots, parsed.compilation_roots, @@ -1201,9 +1216,9 @@ fn meta_is_valid(ruby: &Ruby, args: &[Value]) -> Result { let json_schema = to_schema_value(ruby, schema)?; - let result = if let Some(reg) = registry { + let result = if let Some(registry) = registry { jsonschema::meta::options() - .with_registry(reg.inner.clone()) + .with_registry(®istry.inner) .validate(&json_schema) } else { jsonschema::meta::validate(&json_schema) @@ -1230,9 +1245,9 @@ fn meta_validate(ruby: &Ruby, args: &[Value]) -> Result<(), Error> { let json_schema = to_schema_value(ruby, schema)?; - let result = if let Some(reg) = registry { + let result = if let Some(registry) = registry { jsonschema::meta::options() - .with_registry(reg.inner.clone()) + .with_registry(®istry.inner) .validate(&json_schema) } else { jsonschema::meta::validate(&json_schema) diff --git a/crates/jsonschema-rb/src/options.rs b/crates/jsonschema-rb/src/options.rs index 8bde03dc..e8d722f2 100644 --- a/crates/jsonschema-rb/src/options.rs +++ b/crates/jsonschema-rb/src/options.rs @@ -56,9 +56,10 @@ define_rb_intern!(static SYM_CALL: "call"); define_rb_intern!(static SYM_NEW: "new"); define_rb_intern!(static SYM_VALIDATE: "validate"); -pub struct ParsedOptions { +pub struct ParsedOptions<'i> { pub mask: Option, - pub options: jsonschema::ValidationOptions, + pub options: jsonschema::ValidationOptions<'i>, + pub registry: Option<&'i jsonschema::Registry<'static>>, pub retriever: Option, // Runtime callbacks invoked during `validator.*` calls (formats / custom keywords). // Retriever callbacks are used at build time and do not affect GVL behavior at runtime. @@ -420,8 +421,9 @@ pub fn make_options_from_kwargs( pattern_options_val: Option, email_options_val: Option, http_options_val: Option, -) -> Result { +) -> Result, Error> { let mut opts = jsonschema::options(); + let mut registry = None; let mut retriever = None; let retriever_was_provided = retriever_val.is_some(); let mut has_ruby_callbacks = false; @@ -473,7 +475,7 @@ pub fn make_options_from_kwargs( "registry must be a JSONSchema::Registry instance", ) })?; - opts = opts.with_registry(reg.inner.clone()); + registry = Some(®.inner); if !retriever_was_provided && retriever.is_none() { if let Some(registry_retriever_value) = reg.retriever_value(ruby) { @@ -763,6 +765,7 @@ pub fn make_options_from_kwargs( Ok(ParsedOptions { mask, options: opts, + registry, retriever, has_ruby_callbacks, callback_roots, diff --git a/crates/jsonschema-rb/src/registry.rs b/crates/jsonschema-rb/src/registry.rs index a6d9159d..2f4288ab 100644 --- a/crates/jsonschema-rb/src/registry.rs +++ b/crates/jsonschema-rb/src/registry.rs @@ -40,7 +40,7 @@ impl Drop for RetrieverBuildRootGuard { #[derive(magnus::TypedData)] #[magnus(class = "JSONSchema::Registry", free_immediately, size, mark)] pub struct Registry { - pub inner: jsonschema::Registry, + pub inner: jsonschema::Registry<'static>, retriever_root: Option>, } @@ -72,7 +72,7 @@ impl Registry { let draft_val = kw.optional.0.flatten(); let retriever_val = kw.optional.1; - let mut builder = jsonschema::Registry::options(); + let mut builder = jsonschema::Registry::new(); let mut retriever_root = None; let mut retriever_build_root = None; @@ -89,29 +89,26 @@ impl Registry { } } - let pairs: Vec<(String, jsonschema::Resource)> = resources - .into_iter() - .map(|item| { - let pair: RArray = TryConvert::try_convert(item)?; - if pair.len() != 2 { - return Err(Error::new( - ruby.exception_arg_error(), - "Each resource must be a [uri, schema] pair", - )); - } - let uri: String = pair.entry(0)?; - let schema_val: Value = pair.entry(1)?; - let schema = to_value(ruby, schema_val)?; - let resource = jsonschema::Resource::from_contents(schema); - Ok((uri, resource)) - }) - .collect::, Error>>()?; - // Keep the retriever proc GC-rooted for the entire build, because `build` // may call into retriever callbacks while traversing referenced resources. let _retriever_build_guard = RetrieverBuildRootGuard::new(retriever_build_root); + for item in resources { + let pair: RArray = TryConvert::try_convert(item)?; + if pair.len() != 2 { + return Err(Error::new( + ruby.exception_arg_error(), + "Each resource must be a [uri, schema] pair", + )); + } + let uri: String = pair.entry(0)?; + let schema_val: Value = pair.entry(1)?; + let schema = to_value(ruby, schema_val)?; + builder = builder + .add(uri, schema) + .map_err(|e| Error::new(ruby.exception_arg_error(), e.to_string()))?; + } let registry = builder - .build(pairs) + .prepare() .map_err(|e| Error::new(ruby.exception_arg_error(), e.to_string()))?; Ok(Registry { diff --git a/crates/jsonschema-referencing/Cargo.toml b/crates/jsonschema-referencing/Cargo.toml index 3747795c..e1769f5e 100644 --- a/crates/jsonschema-referencing/Cargo.toml +++ b/crates/jsonschema-referencing/Cargo.toml @@ -22,10 +22,12 @@ ahash.workspace = true async-trait = { version = "0.1.86", optional = true } fluent-uri = { version = "0.4.1", features = ["serde"] } futures = { version = "0.3.31", optional = true } +itoa = "1" parking_lot = "0.12.3" percent-encoding = "2.3.1" serde_json.workspace = true hashbrown = "0.16" +micromap = "0.3.0" [dev-dependencies] benchmark = { path = "../benchmark/" } diff --git a/crates/jsonschema-referencing/benches/anchor.rs b/crates/jsonschema-referencing/benches/anchor.rs index fce92538..3b516213 100644 --- a/crates/jsonschema-referencing/benches/anchor.rs +++ b/crates/jsonschema-referencing/benches/anchor.rs @@ -14,9 +14,11 @@ fn bench_anchor_lookup(c: &mut Criterion) { } }); let resource = Draft::Draft4.create_resource(data); - let registry = - Registry::try_new("http://example.com/", resource).expect("Invalid registry input"); - + let registry = Registry::new() + .add("http://example.com/", resource) + .expect("Invalid registry input") + .prepare() + .expect("Invalid registry input"); let mut group = c.benchmark_group("Anchor Lookup"); // Benchmark lookup of existing anchor @@ -24,9 +26,9 @@ fn bench_anchor_lookup(c: &mut Criterion) { BenchmarkId::new("resolve", "small"), ®istry, |b, registry| { - let resolver = registry - .try_resolver("http://example.com/") - .expect("Invalid base URI"); + let resolver = registry.resolver( + referencing::uri::from_str("http://example.com/").expect("Invalid base URI"), + ); b.iter_with_large_drop(|| resolver.lookup(black_box("#foo"))); }, ); diff --git a/crates/jsonschema-referencing/benches/pointer.rs b/crates/jsonschema-referencing/benches/pointer.rs index abce3047..f46aa116 100644 --- a/crates/jsonschema-referencing/benches/pointer.rs +++ b/crates/jsonschema-referencing/benches/pointer.rs @@ -29,9 +29,11 @@ fn create_deep_nested_json(depth: usize) -> Value { fn bench_pointers(c: &mut Criterion) { let data = create_deep_nested_json(15); let resource = Draft::Draft202012.create_resource(data); - let registry = Registry::try_new("http://example.com/schema.json", resource) + let registry = Registry::new() + .add("http://example.com/schema.json", resource) + .expect("Invalid registry input") + .prepare() .expect("Invalid registry input"); - let cases = [ ("single", "#/properties"), ("double", "#/properties/level_0"), @@ -45,9 +47,10 @@ fn bench_pointers(c: &mut Criterion) { BenchmarkId::new("pointer", name), ®istry, |b, registry| { - let resolver = registry - .try_resolver("http://example.com/schema.json") - .expect("Invalid base URI"); + let resolver = registry.resolver( + referencing::uri::from_str("http://example.com/schema.json") + .expect("Invalid base URI"), + ); b.iter_with_large_drop(|| resolver.lookup(black_box(pointer))); }, ); diff --git a/crates/jsonschema-referencing/benches/registry.rs b/crates/jsonschema-referencing/benches/registry.rs index b8229755..308aa092 100644 --- a/crates/jsonschema-referencing/benches/registry.rs +++ b/crates/jsonschema-referencing/benches/registry.rs @@ -25,10 +25,12 @@ fn bench_subresources(c: &mut Criterion) { group.bench_with_input(BenchmarkId::new("try_new", name), &schema, |b, schema| { b.iter_batched( - || draft.create_resource(schema.clone()), + || draft.create_resource_ref(schema), |resource| { - Registry::try_new("http://example.com/schema.json", resource) + Registry::new() + .add("http://example.com/schema.json", resource) .expect("Invalid registry input") + .prepare() }, BatchSize::SmallInput, ); @@ -50,14 +52,12 @@ fn bench_subresources(c: &mut Criterion) { &schema, |b, schema| { b.iter_batched( - || { - ( - draft.create_resource(schema.clone()), - SPECIFICATIONS.clone(), - ) - }, + || (draft.create_resource_ref(schema), &*SPECIFICATIONS), |(resource, registry)| { - registry.try_with_resource("http://example.com/schema.json", resource) + registry + .add("http://example.com/schema.json", resource) + .expect("Invalid registry input") + .prepare() }, BatchSize::SmallInput, ); diff --git a/crates/jsonschema-referencing/src/anchors/mod.rs b/crates/jsonschema-referencing/src/anchors/mod.rs index ae595306..bc7b3248 100644 --- a/crates/jsonschema-referencing/src/anchors/mod.rs +++ b/crates/jsonschema-referencing/src/anchors/mod.rs @@ -1,86 +1,33 @@ -use std::{ - hash::Hash, - sync::atomic::{AtomicPtr, Ordering}, -}; - use serde_json::Value; -mod keys; - -use crate::{resource::InnerResourcePtr, Draft, Error, Resolved, Resolver}; -pub(crate) use keys::{AnchorKey, AnchorKeyRef}; - -#[derive(Debug)] -pub(crate) struct AnchorName { - ptr: AtomicPtr, - len: usize, -} - -impl AnchorName { - fn new(s: &str) -> Self { - Self { - ptr: AtomicPtr::new(s.as_ptr().cast_mut()), - len: s.len(), - } - } - - #[allow(unsafe_code)] - fn as_str(&self) -> &str { - // SAFETY: The pointer is valid as long as the registry exists - unsafe { - std::str::from_utf8_unchecked(std::slice::from_raw_parts( - self.ptr.load(Ordering::Relaxed), - self.len, - )) - } - } -} - -impl Clone for AnchorName { - fn clone(&self) -> Self { - Self { - ptr: AtomicPtr::new(self.ptr.load(Ordering::Relaxed)), - len: self.len, - } - } -} - -impl Hash for AnchorName { - fn hash(&self, state: &mut H) { - self.as_str().hash(state); - } -} - -impl PartialEq for AnchorName { - fn eq(&self, other: &Self) -> bool { - self.as_str() == other.as_str() - } -} - -impl Eq for AnchorName {} +use crate::{Draft, Error, Resolved, Resolver, ResourceRef}; /// An anchor within a resource. -#[derive(Debug, Clone)] -pub(crate) enum Anchor { +#[derive(Debug, Clone, Copy)] +pub(crate) enum Anchor<'a> { Default { - name: AnchorName, - resource: InnerResourcePtr, + name: &'a str, + resource: ResourceRef<'a>, }, Dynamic { - name: AnchorName, - resource: InnerResourcePtr, + name: &'a str, + resource: ResourceRef<'a>, }, } -impl Anchor { +impl<'a> Anchor<'a> { /// Anchor's name. - pub(crate) fn name(&self) -> AnchorName { + #[inline] + pub(crate) fn name(&self) -> &'a str { match self { - Anchor::Default { name, .. } | Anchor::Dynamic { name, .. } => name.clone(), + Anchor::Default { name, .. } | Anchor::Dynamic { name, .. } => name, } } +} + +impl<'r> Anchor<'r> { /// Get the resource for this anchor. - pub(crate) fn resolve<'r>(&'r self, resolver: Resolver<'r>) -> Result, Error> { + pub(crate) fn resolve(&self, resolver: Resolver<'r>) -> Result, Error> { match self { Anchor::Default { resource, .. } => Ok(Resolved::new( resource.contents(), @@ -88,9 +35,9 @@ impl Anchor { resource.draft(), )), Anchor::Dynamic { name, resource } => { - let mut last = resource; + let mut last = *resource; for uri in &resolver.dynamic_scope() { - match resolver.registry.anchor(uri, name.as_str()) { + match resolver.lookup_anchor(uri, name) { Ok(anchor) => { if let Anchor::Dynamic { resource, .. } = anchor { last = resource; @@ -102,7 +49,7 @@ impl Anchor { } Ok(Resolved::new( last.contents(), - resolver.in_subresource_inner(last)?, + resolver.in_subresource(last)?, last.draft(), )) } @@ -110,14 +57,14 @@ impl Anchor { } } -pub(crate) enum AnchorIter { +pub(crate) enum AnchorIter<'a> { Empty, - One(Anchor), - Two(Anchor, Anchor), + One(Anchor<'a>), + Two(Anchor<'a>, Anchor<'a>), } -impl Iterator for AnchorIter { - type Item = Anchor; +impl<'a> Iterator for AnchorIter<'a> { + type Item = Anchor<'a>; fn next(&mut self) -> Option { match std::mem::replace(self, AnchorIter::Empty) { @@ -131,7 +78,7 @@ impl Iterator for AnchorIter { } } -pub(crate) fn anchor(draft: Draft, contents: &Value) -> AnchorIter { +pub(crate) fn anchor(draft: Draft, contents: &Value) -> AnchorIter<'_> { let Some(schema) = contents.as_object() else { return AnchorIter::Empty; }; @@ -142,16 +89,16 @@ pub(crate) fn anchor(draft: Draft, contents: &Value) -> AnchorIter { .get("$anchor") .and_then(Value::as_str) .map(|name| Anchor::Default { - name: AnchorName::new(name), - resource: InnerResourcePtr::new(contents, draft), + name, + resource: ResourceRef::new(contents, draft), }); let dynamic_anchor = schema .get("$dynamicAnchor") .and_then(Value::as_str) .map(|name| Anchor::Dynamic { - name: AnchorName::new(name), - resource: InnerResourcePtr::new(contents, draft), + name, + resource: ResourceRef::new(contents, draft), }); match (default_anchor, dynamic_anchor) { @@ -162,21 +109,21 @@ pub(crate) fn anchor(draft: Draft, contents: &Value) -> AnchorIter { } } -pub(crate) fn anchor_2019(draft: Draft, contents: &Value) -> AnchorIter { +pub(crate) fn anchor_2019(draft: Draft, contents: &Value) -> AnchorIter<'_> { match contents .as_object() .and_then(|schema| schema.get("$anchor")) .and_then(Value::as_str) { Some(name) => AnchorIter::One(Anchor::Default { - name: AnchorName::new(name), - resource: InnerResourcePtr::new(contents, draft), + name, + resource: ResourceRef::new(contents, draft), }), None => AnchorIter::Empty, } } -pub(crate) fn legacy_anchor_in_dollar_id(draft: Draft, contents: &Value) -> AnchorIter { +pub(crate) fn legacy_anchor_in_dollar_id(draft: Draft, contents: &Value) -> AnchorIter<'_> { match contents .as_object() .and_then(|schema| schema.get("$id")) @@ -184,14 +131,14 @@ pub(crate) fn legacy_anchor_in_dollar_id(draft: Draft, contents: &Value) -> Anch .and_then(|id| id.strip_prefix('#')) { Some(id) => AnchorIter::One(Anchor::Default { - name: AnchorName::new(id), - resource: InnerResourcePtr::new(contents, draft), + name: id, + resource: ResourceRef::new(contents, draft), }), None => AnchorIter::Empty, } } -pub(crate) fn legacy_anchor_in_id(draft: Draft, contents: &Value) -> AnchorIter { +pub(crate) fn legacy_anchor_in_id(draft: Draft, contents: &Value) -> AnchorIter<'_> { match contents .as_object() .and_then(|schema| schema.get("id")) @@ -199,8 +146,8 @@ pub(crate) fn legacy_anchor_in_id(draft: Draft, contents: &Value) -> AnchorIter .and_then(|id| id.strip_prefix('#')) { Some(id) => AnchorIter::One(Anchor::Default { - name: AnchorName::new(id), - resource: InnerResourcePtr::new(contents, draft), + name: id, + resource: ResourceRef::new(contents, draft), }), None => AnchorIter::Empty, } @@ -214,11 +161,13 @@ mod tests { #[test] fn test_lookup_trivial_dynamic_ref() { let one = Draft::Draft202012.create_resource(json!({"$dynamicAnchor": "foo"})); - let registry = - Registry::try_new("http://example.com", one.clone()).expect("Invalid resources"); + let registry = Registry::new() + .add("http://example.com", &one) + .expect("Invalid resources") + .prepare() + .expect("Invalid resources"); let resolver = registry - .try_resolver("http://example.com") - .expect("Invalid base URI"); + .resolver(crate::uri::from_str("http://example.com").expect("Invalid base URI")); let resolved = resolver.lookup("#foo").expect("Lookup failed"); assert_eq!(resolved.contents(), one.contents()); } @@ -243,15 +192,17 @@ mod tests { }, })); - let registry = Registry::try_from_resources([ - ("http://example.com".to_string(), root.clone()), - ("http://example.com/foo/".to_string(), true_resource), - ("http://example.com/foo/bar".to_string(), root.clone()), - ]) - .expect("Invalid resources"); + let registry = Registry::new() + .extend([ + ("http://example.com", &root), + ("http://example.com/foo/", &true_resource), + ("http://example.com/foo/bar", &root), + ]) + .expect("Invalid resources") + .prepare() + .expect("Invalid resources"); let resolver = registry - .try_resolver("http://example.com") - .expect("Invalid base URI"); + .resolver(crate::uri::from_str("http://example.com").expect("Invalid base URI")); let first = resolver.lookup("").expect("Lookup failed"); let second = first.resolver().lookup("foo/").expect("Lookup failed"); @@ -284,15 +235,17 @@ mod tests { }, })); - let registry = Registry::try_from_resources([ - ("http://example.com".to_string(), two.clone()), - ("http://example.com/foo/".to_string(), one), - ("http://example.com/foo/bar".to_string(), two.clone()), - ]) - .expect("Invalid resources"); + let registry = Registry::new() + .extend([ + ("http://example.com", &two), + ("http://example.com/foo/", &one), + ("http://example.com/foo/bar", &two), + ]) + .expect("Invalid resources") + .prepare() + .expect("Invalid resources"); let resolver = registry - .try_resolver("http://example.com") - .expect("Invalid base URI"); + .resolver(crate::uri::from_str("http://example.com").expect("Invalid base URI")); let first = resolver.lookup("").expect("Lookup failed"); let second = first.resolver().lookup("foo/").expect("Lookup failed"); @@ -311,14 +264,17 @@ mod tests { "foo": { "$anchor": "knownAnchor" } } })); - let registry = Registry::try_new("http://example.com", schema).expect("Invalid resources"); + let registry = Registry::new() + .add("http://example.com", schema) + .expect("Invalid resources") + .prepare() + .expect("Invalid resources"); let resolver = registry - .try_resolver("http://example.com") - .expect("Invalid base URI"); + .resolver(crate::uri::from_str("http://example.com").expect("Invalid base URI")); let result = resolver.lookup("#unknownAnchor"); assert_eq!( - result.unwrap_err().to_string(), + result.expect_err("Should fail").to_string(), "Anchor 'unknownAnchor' does not exist" ); } @@ -330,42 +286,49 @@ mod tests { "foo": { "$anchor": "knownAnchor" } } })); - let registry = Registry::try_new("http://example.com", schema).expect("Invalid resources"); + let registry = Registry::new() + .add("http://example.com", schema) + .expect("Invalid resources") + .prepare() + .expect("Invalid resources"); let resolver = registry - .try_resolver("http://example.com") - .expect("Invalid base URI"); + .resolver(crate::uri::from_str("http://example.com").expect("Invalid base URI")); let result = resolver.lookup("#invalid/anchor"); assert_eq!( - result.unwrap_err().to_string(), + result.expect_err("Should fail").to_string(), "Anchor 'invalid/anchor' is invalid" ); } #[test] fn test_lookup_trivial_recursive_ref() { - let one = Draft::Draft201909.create_resource(json!({"$recursiveAnchor": true})); - let registry = - Registry::try_new("http://example.com", one.clone()).expect("Invalid resources"); + let resource = Draft::Draft201909.create_resource(json!({"$recursiveAnchor": true})); + let registry = Registry::new() + .add("http://example.com", &resource) + .expect("Invalid resources") + .prepare() + .expect("Invalid resources"); let resolver = registry - .try_resolver("http://example.com") - .expect("Invalid base URI"); + .resolver(crate::uri::from_str("http://example.com").expect("Invalid base URI")); let first = resolver.lookup("").expect("Lookup failed"); let resolved = first .resolver() .lookup_recursive_ref() .expect("Lookup failed"); - assert_eq!(resolved.contents(), one.contents()); + assert_eq!(resolved.contents(), resource.contents()); } #[test] fn test_lookup_recursive_ref_to_bool() { let true_resource = Draft::Draft201909.create_resource(json!(true)); - let registry = Registry::try_new("http://example.com", true_resource.clone()) + let registry = Registry::new() + .add("http://example.com", &true_resource) + .expect("Invalid resources") + .prepare() .expect("Invalid resources"); let resolver = registry - .try_resolver("http://example.com") - .expect("Invalid base URI"); + .resolver(crate::uri::from_str("http://example.com").expect("Invalid base URI")); let resolved = resolver.lookup_recursive_ref().expect("Lookup failed"); assert_eq!(resolved.contents(), true_resource.contents()); } @@ -391,16 +354,17 @@ mod tests { }, })); - let registry = Registry::try_from_resources(vec![ - ("http://example.com".to_string(), root.clone()), - ("http://example.com/foo/".to_string(), true_resource), - ("http://example.com/foo/bar".to_string(), root.clone()), - ]) - .expect("Invalid resources"); - + let registry = Registry::new() + .extend([ + ("http://example.com", &root), + ("http://example.com/foo/", &true_resource), + ("http://example.com/foo/bar", &root), + ]) + .expect("Invalid resources") + .prepare() + .expect("Invalid resources"); let resolver = registry - .try_resolver("http://example.com") - .expect("Invalid base URI"); + .resolver(crate::uri::from_str("http://example.com").expect("Invalid base URI")); let first = resolver.lookup("").expect("Lookup failed"); let second = first.resolver().lookup("foo/").expect("Lookup failed"); let third = second.resolver().lookup("bar").expect("Lookup failed"); @@ -433,16 +397,17 @@ mod tests { })); let three = Draft::Draft201909.create_resource(json!({"$recursiveAnchor": false})); - let registry = Registry::try_from_resources(vec![ - ("http://example.com".to_string(), three), - ("http://example.com/foo/".to_string(), two.clone()), - ("http://example.com/foo/bar".to_string(), one), - ]) - .expect("Invalid resources"); - + let registry = Registry::new() + .extend([ + ("http://example.com", &three), + ("http://example.com/foo/", &two), + ("http://example.com/foo/bar", &one), + ]) + .expect("Invalid resources") + .prepare() + .expect("Invalid resources"); let resolver = registry - .try_resolver("http://example.com") - .expect("Invalid base URI"); + .resolver(crate::uri::from_str("http://example.com").expect("Invalid base URI")); let first = resolver.lookup("").expect("Lookup failed"); let second = first.resolver().lookup("foo/").expect("Lookup failed"); let third = second.resolver().lookup("bar").expect("Lookup failed"); diff --git a/crates/jsonschema-referencing/src/cache.rs b/crates/jsonschema-referencing/src/cache.rs index a7b3e1f6..d268d18d 100644 --- a/crates/jsonschema-referencing/src/cache.rs +++ b/crates/jsonschema-referencing/src/cache.rs @@ -140,10 +140,4 @@ impl SharedUriCache { Ok(inserted) } - - pub(crate) fn into_local(self) -> UriCache { - UriCache { - cache: self.cache.into_inner(), - } - } } diff --git a/crates/jsonschema-referencing/src/lib.rs b/crates/jsonschema-referencing/src/lib.rs index 3e9f2ed8..02b78d77 100644 --- a/crates/jsonschema-referencing/src/lib.rs +++ b/crates/jsonschema-referencing/src/lib.rs @@ -6,11 +6,13 @@ mod cache; mod error; mod list; pub mod meta; +mod path; mod registry; mod resolver; mod resource; mod retriever; mod segments; +mod small_map; mod specification; pub mod uri; mod vocabularies; @@ -19,7 +21,11 @@ pub(crate) use anchors::Anchor; pub use error::{Error, UriError}; pub use fluent_uri::{Iri, IriRef, Uri, UriRef}; pub use list::List; -pub use registry::{parse_index, pointer, Registry, RegistryOptions, SPECIFICATIONS}; +#[doc(hidden)] +pub use path::{write_escaped_str, write_index}; +pub use registry::{ + parse_index, pointer, IntoRegistryResource, Registry, RegistryBuilder, SPECIFICATIONS, +}; pub use resolver::{Resolved, Resolver}; pub use resource::{unescape_segment, Resource, ResourceRef}; pub use retriever::{DefaultRetriever, Retrieve}; diff --git a/crates/jsonschema-referencing/src/path.rs b/crates/jsonschema-referencing/src/path.rs new file mode 100644 index 00000000..036306f9 --- /dev/null +++ b/crates/jsonschema-referencing/src/path.rs @@ -0,0 +1,46 @@ +/// Escape a key into a JSON Pointer segment: `~` → `~0`, `/` → `~1`. +/// +/// Appends the escaped form of `value` directly to `buffer`. +pub fn write_escaped_str(buffer: &mut String, value: &str) { + match value.find(['~', '/']) { + Some(mut escape_idx) => { + let mut remaining = value; + + // Loop through the string to replace `~` and `/` + loop { + let (before, after) = remaining.split_at(escape_idx); + // Copy everything before the escape char + buffer.push_str(before); + + // Append the appropriate escape sequence + match after.as_bytes()[0] { + b'~' => buffer.push_str("~0"), + b'/' => buffer.push_str("~1"), + _ => unreachable!(), + } + + // Move past the escaped character + remaining = &after[1..]; + + // Find the next `~` or `/` to continue escaping + if let Some(next_escape_idx) = remaining.find(['~', '/']) { + escape_idx = next_escape_idx; + } else { + // Append any remaining part of the string + buffer.push_str(remaining); + break; + } + } + } + None => { + // If no escape characters are found, append the segment as is + buffer.push_str(value); + } + } +} + +#[inline] +pub fn write_index(buffer: &mut String, idx: usize) { + let mut itoa_buffer = itoa::Buffer::new(); + buffer.push_str(itoa_buffer.format(idx)); +} diff --git a/crates/jsonschema-referencing/src/registry.rs b/crates/jsonschema-referencing/src/registry.rs index 3e1e5b8b..efe4c178 100644 --- a/crates/jsonschema-referencing/src/registry.rs +++ b/crates/jsonschema-referencing/src/registry.rs @@ -1,7 +1,8 @@ use std::{ - collections::{hash_map::Entry, VecDeque}, + borrow::Cow, + collections::VecDeque, + fmt, num::NonZeroUsize, - pin::Pin, sync::{Arc, LazyLock}, }; @@ -10,226 +11,518 @@ use fluent_uri::{pct_enc::EStr, Uri}; use serde_json::Value; use crate::{ - anchors::{AnchorKey, AnchorKeyRef}, cache::{SharedUriCache, UriCache}, meta::{self, metas_for_draft}, - resource::{unescape_segment, InnerResourcePtr, JsonSchemaResource}, + resource::{unescape_segment, PathSegment, PathStack}, + small_map::SmallMap, uri, vocabularies::{self, VocabularySet}, Anchor, DefaultRetriever, Draft, Error, Resolver, Resource, ResourceRef, Retrieve, }; -/// An owned-or-refstatic wrapper for JSON `Value`. #[derive(Debug)] -pub(crate) enum ValueWrapper { - Owned(Value), - StaticRef(&'static Value), +struct StoredDocument<'a> { + value: Cow<'a, Value>, + draft: Draft, +} + +impl<'a> StoredDocument<'a> { + #[inline] + fn owned(value: Value, draft: Draft) -> Self { + Self { + value: Cow::Owned(value), + draft, + } + } + + #[inline] + fn borrowed(value: &'a Value, draft: Draft) -> Self { + Self { + value: Cow::Borrowed(value), + draft, + } + } + + #[inline] + fn contents(&self) -> &Value { + &self.value + } + + #[inline] + fn borrowed_contents(&self) -> Option<&'a Value> { + match &self.value { + Cow::Borrowed(value) => Some(value), + Cow::Owned(_) => None, + } + } + + #[inline] + fn draft(&self) -> Draft { + self.draft + } +} + +type DocumentStore<'a> = AHashMap>, Arc>>; +type AnchorKey = Box; + +#[derive(Debug, Clone, Default)] +struct PreparedIndex<'a> { + resources: SmallMap>, IndexedResource<'a>>, + anchors: SmallMap>, SmallMap>>, +} + +#[derive(Debug, Clone)] +enum IndexedResource<'a> { + Borrowed(ResourceRef<'a>), + Owned { + document: Arc>, + pointer: ParsedPointer, + draft: Draft, + }, +} + +impl IndexedResource<'_> { + #[inline] + fn resolve(&self) -> Option> { + match self { + IndexedResource::Borrowed(resource) => { + Some(ResourceRef::new(resource.contents(), resource.draft())) + } + IndexedResource::Owned { + document, + pointer, + draft, + } => { + let contents = pointer.lookup(document.contents())?; + Some(ResourceRef::new(contents, *draft)) + } + } + } +} + +type BorrowedAnchor<'a> = Anchor<'a>; + +#[derive(Debug, Clone)] +enum IndexedAnchor<'a> { + Borrowed(BorrowedAnchor<'a>), + Owned { + document: Arc>, + pointer: ParsedPointer, + draft: Draft, + kind: IndexedAnchorKind, + name: Box, + }, } -impl AsRef for ValueWrapper { - fn as_ref(&self) -> &Value { +impl IndexedAnchor<'_> { + #[inline] + fn resolve(&self) -> Option> { match self { - ValueWrapper::Owned(value) => value, - ValueWrapper::StaticRef(value) => value, + IndexedAnchor::Borrowed(anchor) => Some(match anchor { + Anchor::Default { name, resource } => Anchor::Default { + name, + resource: ResourceRef::new(resource.contents(), resource.draft()), + }, + Anchor::Dynamic { name, resource } => Anchor::Dynamic { + name, + resource: ResourceRef::new(resource.contents(), resource.draft()), + }, + }), + IndexedAnchor::Owned { + document, + pointer, + draft, + kind, + name, + } => { + let contents = pointer.lookup(document.contents())?; + let resource = ResourceRef::new(contents, *draft); + Some(match kind { + IndexedAnchorKind::Default => Anchor::Default { name, resource }, + IndexedAnchorKind::Dynamic => Anchor::Dynamic { name, resource }, + }) + } } } } -// SAFETY: `Pin` guarantees stable memory locations for resource pointers, -// while `Arc` enables cheap sharing between multiple registries -type DocumentStore = AHashMap>, Pin>>; -type ResourceMap = AHashMap>, InnerResourcePtr>; +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +enum IndexedAnchorKind { + Default, + Dynamic, +} + +#[derive(Debug, Clone, Default)] +struct ParsedPointer { + segments: Vec, +} + +impl ParsedPointer { + fn from_json_pointer(pointer: &str) -> Option { + if pointer.is_empty() { + return Some(Self::default()); + } + if !pointer.starts_with('/') { + return None; + } + + let mut segments = Vec::new(); + for token in pointer.split('/').skip(1).map(unescape_segment) { + if let Some(index) = parse_index(&token) { + segments.push(ParsedPointerSegment::Index(index)); + } else { + segments.push(ParsedPointerSegment::Key( + token.into_owned().into_boxed_str(), + )); + } + } + Some(Self { segments }) + } + + fn from_path_stack(path: &PathStack<'_>) -> Option { + let mut pointer = Self::from_json_pointer(path.base_pointer())?; + for segment in path.segments() { + match segment { + PathSegment::Key(key) => pointer + .segments + .push(ParsedPointerSegment::Key((*key).into())), + PathSegment::Index(index) => { + pointer.segments.push(ParsedPointerSegment::Index(*index)); + } + } + } + Some(pointer) + } + + fn lookup<'a>(&self, document: &'a Value) -> Option<&'a Value> { + self.segments + .iter() + .try_fold(document, |target, token| match token { + ParsedPointerSegment::Key(key) => match target { + Value::Object(map) => map.get(&**key), + _ => None, + }, + ParsedPointerSegment::Index(index) => match target { + Value::Array(list) => list.get(*index), + _ => None, + }, + }) + } +} + +#[derive(Debug, Clone)] +enum ParsedPointerSegment { + Key(Box), + Index(usize), +} /// Pre-loaded registry containing all JSON Schema meta-schemas and their vocabularies -pub static SPECIFICATIONS: LazyLock = +pub static SPECIFICATIONS: LazyLock> = LazyLock::new(|| Registry::build_from_meta_schemas(meta::META_SCHEMAS_ALL.as_slice())); /// A registry of JSON Schema resources, each identified by their canonical URIs. /// -/// Registries store a collection of in-memory resources and their anchors. -/// They eagerly process all added resources, including their subresources and anchors. -/// This means that subresources contained within any added resources are immediately -/// discoverable and retrievable via their own IDs. -/// -/// # Resource Retrieval -/// -/// Registry supports both blocking and non-blocking retrieval of external resources. -/// -/// ## Blocking Retrieval +/// `Registry` is a prepared registry: add resources with [`Registry::new`] and +/// [`RegistryBuilder::add`], then call [`RegistryBuilder::prepare`] to build the +/// reusable registry. To resolve `$ref` references directly, create a [`Resolver`] +/// from the prepared registry: /// /// ```rust -/// use referencing::{Registry, Resource, Retrieve, Uri}; -/// use serde_json::{json, Value}; +/// use referencing::Registry; /// -/// struct ExampleRetriever; +/// # fn main() -> Result<(), Box> { +/// let schema = serde_json::json!({ +/// "$schema": "https://json-schema.org/draft/2020-12/schema", +/// "$id": "https://example.com/root", +/// "$defs": { "item": { "type": "string" } }, +/// "items": { "$ref": "#/$defs/item" } +/// }); /// -/// impl Retrieve for ExampleRetriever { -/// fn retrieve( -/// &self, -/// uri: &Uri -/// ) -> Result> { -/// // Always return the same value for brevity -/// Ok(json!({"type": "string"})) -/// } -/// } +/// let registry = Registry::new() +/// .add("https://example.com/root", schema)? +/// .prepare()?; /// -/// # fn example() -> Result<(), Box> { -/// let registry = Registry::options() -/// .retriever(ExampleRetriever) -/// .build([ -/// // Initial schema that might reference external schemas -/// ( -/// "https://example.com/user.json", -/// Resource::from_contents(json!({ -/// "type": "object", -/// "properties": { -/// // Should be retrieved by `ExampleRetriever` -/// "role": {"$ref": "https://example.com/role.json"} -/// } -/// })) -/// ) -/// ])?; +/// let resolver = registry.resolver(referencing::uri::from_str("https://example.com/root")?); /// # Ok(()) /// # } /// ``` -/// -/// ## Non-blocking Retrieval -/// -/// ```rust -/// # #[cfg(feature = "retrieve-async")] -/// # mod example { -/// use referencing::{Registry, Resource, AsyncRetrieve, Uri}; -/// use serde_json::{json, Value}; -/// -/// struct ExampleRetriever; -/// -/// #[cfg_attr(target_family = "wasm", async_trait::async_trait(?Send))] -/// #[cfg_attr(not(target_family = "wasm"), async_trait::async_trait)] -/// impl AsyncRetrieve for ExampleRetriever { -/// async fn retrieve( -/// &self, -/// uri: &Uri -/// ) -> Result> { -/// // Always return the same value for brevity -/// Ok(json!({"type": "string"})) -/// } -/// } -/// -/// # async fn example() -> Result<(), Box> { -/// let registry = Registry::options() -/// .async_retriever(ExampleRetriever) -/// .build([ -/// ( -/// "https://example.com/user.json", -/// Resource::from_contents(json!({ -/// // Should be retrieved by `ExampleRetriever` -/// "$ref": "https://example.com/common/user.json" -/// })) -/// ) -/// ]) -/// .await?; -/// # Ok(()) -/// # } -/// # } -/// ``` -/// -/// The registry will automatically: -/// -/// - Resolve external references -/// - Cache retrieved schemas -/// - Handle nested references -/// - Process JSON Schema anchors -/// -#[derive(Debug)] -pub struct Registry { - documents: DocumentStore, - pub(crate) resources: ResourceMap, - anchors: AHashMap, +#[derive(Debug, Clone)] +pub struct Registry<'a> { + baseline: Option<&'a Registry<'a>>, resolution_cache: SharedUriCache, + known_resources: KnownResources, + index_data: PreparedIndex<'a>, } -impl Clone for Registry { - fn clone(&self) -> Self { - Self { - documents: self.documents.clone(), - resources: self.resources.clone(), - anchors: self.anchors.clone(), - resolution_cache: self.resolution_cache.clone(), - } +#[derive(Clone)] +pub struct RegistryBuilder<'a> { + baseline: Option<&'a Registry<'a>>, + pending: AHashMap, PendingResource<'a>>, + retriever: Arc, + #[cfg(feature = "retrieve-async")] + async_retriever: Option>, + draft: Option, +} + +impl fmt::Debug for RegistryBuilder<'_> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_struct("RegistryBuilder") + .field("has_baseline", &self.baseline.is_some()) + .field("pending_len", &self.pending.len()) + .field("draft", &self.draft) + .finish() } } -/// Configuration options for creating a [`Registry`]. -pub struct RegistryOptions { - retriever: R, - draft: Draft, +#[derive(Clone)] +pub(crate) enum PendingResource<'a> { + OwnedValue(Value), + BorrowedValue(&'a Value), + OwnedResource(Resource), + BorrowedResource(ResourceRef<'a>), } -impl RegistryOptions { - /// Set specification version under which the resources should be interpreted under. - #[must_use] - pub fn draft(mut self, draft: Draft) -> Self { - self.draft = draft; - self +pub(crate) mod private { + use ahash::AHashMap; + use fluent_uri::Uri; + + use super::PendingResource; + + pub(crate) trait Sealed<'a> { + fn insert_into( + self, + pending: &mut AHashMap, PendingResource<'a>>, + uri: Uri, + ); } } -impl RegistryOptions> { - /// Create a new [`RegistryOptions`] with default settings. - #[must_use] - pub fn new() -> Self { +#[allow(private_bounds)] +pub trait IntoRegistryResource<'a>: private::Sealed<'a> {} + +impl<'a, T> IntoRegistryResource<'a> for T where T: private::Sealed<'a> {} + +impl<'a> private::Sealed<'a> for Resource { + fn insert_into( + self, + pending: &mut AHashMap, PendingResource<'a>>, + uri: Uri, + ) { + pending.insert(uri, PendingResource::OwnedResource(self)); + } +} + +impl<'a> private::Sealed<'a> for &'a Resource { + fn insert_into( + self, + pending: &mut AHashMap, PendingResource<'a>>, + uri: Uri, + ) { + pending.insert( + uri, + PendingResource::BorrowedResource(ResourceRef::new(self.contents(), self.draft())), + ); + } +} + +impl<'a> private::Sealed<'a> for &'a Value { + fn insert_into( + self, + pending: &mut AHashMap, PendingResource<'a>>, + uri: Uri, + ) { + pending.insert(uri, PendingResource::BorrowedValue(self)); + } +} + +impl<'a> private::Sealed<'a> for ResourceRef<'a> { + fn insert_into( + self, + pending: &mut AHashMap, PendingResource<'a>>, + uri: Uri, + ) { + pending.insert(uri, PendingResource::BorrowedResource(self)); + } +} + +impl<'a> private::Sealed<'a> for Value { + fn insert_into( + self, + pending: &mut AHashMap, PendingResource<'a>>, + uri: Uri, + ) { + pending.insert(uri, PendingResource::OwnedValue(self)); + } +} + +impl<'a> RegistryBuilder<'a> { + fn new() -> Self { + Self { + baseline: None, + pending: AHashMap::new(), + retriever: Arc::new(DefaultRetriever), + #[cfg(feature = "retrieve-async")] + async_retriever: None, + draft: None, + } + } + + fn from_registry(registry: &'a Registry<'a>) -> Self { Self { + baseline: Some(registry), + pending: AHashMap::new(), retriever: Arc::new(DefaultRetriever), - draft: Draft::default(), + #[cfg(feature = "retrieve-async")] + async_retriever: None, + draft: None, } } - /// Set a custom retriever for the [`Registry`]. + + #[must_use] + pub fn draft(mut self, draft: Draft) -> Self { + self.draft = Some(draft); + self + } + #[must_use] pub fn retriever(mut self, retriever: impl IntoRetriever) -> Self { self.retriever = retriever.into_retriever(); self } - /// Set a custom async retriever for the [`Registry`]. + #[cfg(feature = "retrieve-async")] #[must_use] - pub fn async_retriever( + pub fn async_retriever(mut self, retriever: impl IntoAsyncRetriever) -> Self { + self.async_retriever = Some(retriever.into_retriever()); + self + } + + /// Add a resource to the registry builder. + /// + /// # Errors + /// + /// Returns an error if the URI is invalid. + pub fn add<'b>( self, - retriever: impl IntoAsyncRetriever, - ) -> RegistryOptions> { - RegistryOptions { - retriever: retriever.into_retriever(), + uri: impl AsRef, + resource: impl IntoRegistryResource<'b>, + ) -> Result, Error> + where + 'a: 'b, + { + let parsed = uri::from_str(uri.as_ref().trim_end_matches('#'))?; + let mut pending: AHashMap, PendingResource<'b>> = + self.pending.into_iter().collect(); + private::Sealed::insert_into(resource, &mut pending, parsed); + Ok(RegistryBuilder { + baseline: self.baseline, + pending, + retriever: self.retriever, + #[cfg(feature = "retrieve-async")] + async_retriever: self.async_retriever, + draft: self.draft, + }) + } + + /// Add multiple resources to the registry builder. + /// + /// # Errors + /// + /// Returns an error if any URI is invalid. + pub fn extend<'b, I, U, T>(self, pairs: I) -> Result, Error> + where + 'a: 'b, + I: IntoIterator, + U: AsRef, + T: IntoRegistryResource<'b>, + { + let mut builder = RegistryBuilder { + baseline: self.baseline, + pending: self.pending.into_iter().collect(), + retriever: self.retriever, + #[cfg(feature = "retrieve-async")] + async_retriever: self.async_retriever, draft: self.draft, + }; + for (uri, resource) in pairs { + builder = builder.add(uri, resource)?; } + Ok(builder) } - /// Create a [`Registry`] from multiple resources using these options. + + /// Prepare the registry for reuse. /// /// # Errors /// - /// Returns an error if: - /// - Any URI is invalid - /// - Any referenced resources cannot be retrieved - pub fn build( - self, - pairs: impl IntoIterator, Resource)>, - ) -> Result { - Registry::try_from_resources_impl(pairs, &*self.retriever, self.draft) + /// Returns an error if URI processing, retrieval, or custom meta-schema validation fails. + pub fn prepare(self) -> Result, Error> { + if let Some(baseline) = self.baseline { + baseline.try_with_pending_resources_and_retriever( + self.pending, + &*self.retriever, + self.draft, + ) + } else { + Registry::try_from_pending_resources_impl(self.pending, &*self.retriever, self.draft) + } + } + + #[cfg(feature = "retrieve-async")] + /// Prepare the registry for reuse with async retrieval. + /// + /// # Errors + /// + /// Returns an error if URI processing, retrieval, or custom meta-schema validation fails. + pub async fn async_prepare(self) -> Result, Error> { + let retriever = self + .async_retriever + .unwrap_or_else(|| Arc::new(DefaultRetriever)); + if let Some(baseline) = self.baseline { + baseline + .try_with_pending_resources_and_retriever_async( + self.pending, + &*retriever, + self.draft, + ) + .await + } else { + Registry::try_from_pending_resources_async_impl(self.pending, &*retriever, self.draft) + .await + } } } -#[cfg(feature = "retrieve-async")] -impl RegistryOptions> { - /// Create a [`Registry`] from multiple resources using these options with async retrieval. +impl<'a> Registry<'a> { + /// Add a resource to a prepared registry, returning a builder that must be prepared again. /// /// # Errors /// - /// Returns an error if: - /// - Any URI is invalid - /// - Any referenced resources cannot be retrieved - pub async fn build( - self, - pairs: impl IntoIterator, Resource)>, - ) -> Result { - Registry::try_from_resources_async_impl(pairs, &*self.retriever, self.draft).await + /// Returns an error if the URI is invalid. + pub fn add<'b>( + &'b self, + uri: impl AsRef, + resource: impl IntoRegistryResource<'b>, + ) -> Result, Error> + where + 'a: 'b, + { + RegistryBuilder::from_registry(self).add(uri, resource) + } + + /// Add multiple resources to a prepared registry, returning a builder that + /// must be prepared again. + /// + /// # Errors + /// + /// Returns an error if any URI is invalid. + pub fn extend<'b, I, U, T>(&'b self, pairs: I) -> Result, Error> + where + 'a: 'b, + I: IntoIterator, + U: AsRef, + T: IntoRegistryResource<'b>, + { + RegistryBuilder::from_registry(self).extend(pairs) } } @@ -268,247 +561,157 @@ impl IntoAsyncRetriever for Arc { } } -impl Default for RegistryOptions> { - fn default() -> Self { - Self::new() - } -} - -impl Registry { - /// Get [`RegistryOptions`] for configuring a new [`Registry`]. +impl Registry<'static> { + #[allow(clippy::new_ret_no_self)] #[must_use] - pub fn options() -> RegistryOptions> { - RegistryOptions::new() - } - /// Create a new [`Registry`] with a single resource. - /// - /// # Arguments - /// - /// * `uri` - The URI of the resource. - /// * `resource` - The resource to add. - /// - /// # Errors - /// - /// Returns an error if the URI is invalid or if there's an issue processing the resource. - pub fn try_new(uri: impl AsRef, resource: Resource) -> Result { - Self::try_new_impl(uri, resource, &DefaultRetriever, Draft::default()) - } - /// Create a new [`Registry`] from an iterator of (URI, Resource) pairs. - /// - /// # Arguments - /// - /// * `pairs` - An iterator of (URI, Resource) pairs. - /// - /// # Errors - /// - /// Returns an error if any URI is invalid or if there's an issue processing the resources. - pub fn try_from_resources( - pairs: impl IntoIterator, Resource)>, - ) -> Result { - Self::try_from_resources_impl(pairs, &DefaultRetriever, Draft::default()) - } - fn try_new_impl( - uri: impl AsRef, - resource: Resource, - retriever: &dyn Retrieve, - draft: Draft, - ) -> Result { - Self::try_from_resources_impl([(uri, resource)], retriever, draft) + pub fn new<'a>() -> RegistryBuilder<'a> { + RegistryBuilder::new() } - fn try_from_resources_impl( - pairs: impl IntoIterator, Resource)>, + + fn try_from_pending_resources_impl<'a>( + pairs: impl IntoIterator, PendingResource<'a>)>, retriever: &dyn Retrieve, - draft: Draft, - ) -> Result { - let mut documents = AHashMap::new(); - let mut resources = ResourceMap::new(); - let mut anchors = AHashMap::new(); + draft: Option, + ) -> Result, Error> { + let mut documents = DocumentStore::new(); + let mut known_resources = KnownResources::new(); let mut resolution_cache = UriCache::new(); - let custom_metaschemas = process_resources( + + let (custom_metaschemas, index_data) = process_resources_mixed( pairs, retriever, &mut documents, - &mut resources, - &mut anchors, + &mut known_resources, &mut resolution_cache, draft, )?; - // Validate that all custom $schema references are registered - validate_custom_metaschemas(&custom_metaschemas, &resources)?; + validate_custom_metaschemas(&custom_metaschemas, &known_resources)?; Ok(Registry { - documents, - resources, - anchors, + baseline: None, resolution_cache: resolution_cache.into_shared(), + known_resources, + index_data, }) } - /// Create a new [`Registry`] from an iterator of (URI, Resource) pairs using an async retriever. - /// - /// # Arguments - /// - /// * `pairs` - An iterator of (URI, Resource) pairs. - /// - /// # Errors - /// - /// Returns an error if any URI is invalid or if there's an issue processing the resources. + #[cfg(feature = "retrieve-async")] - async fn try_from_resources_async_impl( - pairs: impl IntoIterator, Resource)>, + async fn try_from_pending_resources_async_impl<'a>( + pairs: impl IntoIterator, PendingResource<'a>)>, retriever: &dyn crate::AsyncRetrieve, - draft: Draft, - ) -> Result { - let mut documents = AHashMap::new(); - let mut resources = ResourceMap::new(); - let mut anchors = AHashMap::new(); + draft: Option, + ) -> Result, Error> { + let mut documents = DocumentStore::new(); + let mut known_resources = KnownResources::new(); let mut resolution_cache = UriCache::new(); - let custom_metaschemas = process_resources_async( + let (custom_metaschemas, index_data) = process_resources_async_mixed( pairs, retriever, &mut documents, - &mut resources, - &mut anchors, + &mut known_resources, &mut resolution_cache, draft, ) .await?; - // Validate that all custom $schema references are registered - validate_custom_metaschemas(&custom_metaschemas, &resources)?; + validate_custom_metaschemas(&custom_metaschemas, &known_resources)?; Ok(Registry { - documents, - resources, - anchors, + baseline: None, resolution_cache: resolution_cache.into_shared(), + known_resources, + index_data, }) } - /// Create a new registry with a new resource. - /// - /// # Errors - /// - /// Returns an error if the URI is invalid or if there's an issue processing the resource. - pub fn try_with_resource( - self, - uri: impl AsRef, - resource: Resource, - ) -> Result { - let draft = resource.draft(); - self.try_with_resources([(uri, resource)], draft) - } - /// Create a new registry with new resources. - /// - /// # Errors - /// - /// Returns an error if any URI is invalid or if there's an issue processing the resources. - pub fn try_with_resources( - self, - pairs: impl IntoIterator, Resource)>, - draft: Draft, - ) -> Result { - self.try_with_resources_and_retriever(pairs, &DefaultRetriever, draft) + + /// Build a registry with all the given meta-schemas from specs. + pub(crate) fn build_from_meta_schemas(schemas: &[(&'static str, &'static Value)]) -> Self { + let mut documents = DocumentStore::with_capacity(schemas.len()); + let mut known_resources = KnownResources::with_capacity(schemas.len()); + + for (uri, schema) in schemas { + let parsed = + uri::from_str(uri.trim_end_matches('#')).expect("meta-schema URI must be valid"); + let key = Arc::new(parsed); + let draft = Draft::default().detect(schema); + known_resources.insert((*key).clone()); + documents.insert(key, Arc::new(StoredDocument::borrowed(schema, draft))); + } + + let mut resolution_cache = UriCache::with_capacity(35); + let index_data = build_prepared_index_for_documents(&documents, &mut resolution_cache) + .expect("meta-schema index data must build"); + + Self { + baseline: None, + resolution_cache: resolution_cache.into_shared(), + known_resources, + index_data, + } } - /// Create a new registry with new resources and using the given retriever. - /// - /// # Errors - /// - /// Returns an error if any URI is invalid or if there's an issue processing the resources. - pub fn try_with_resources_and_retriever( - self, - pairs: impl IntoIterator, Resource)>, +} + +impl<'a> Registry<'a> { + fn try_with_pending_resources_and_retriever( + &'a self, + pairs: impl IntoIterator, PendingResource<'a>)>, retriever: &dyn Retrieve, - draft: Draft, - ) -> Result { - let mut documents = self.documents; - let mut resources = self.resources; - let mut anchors = self.anchors; - let mut resolution_cache = self.resolution_cache.into_local(); - let custom_metaschemas = process_resources( + draft: Option, + ) -> Result, Error> { + let mut documents = DocumentStore::new(); + let mut resolution_cache = UriCache::new(); + let mut known_resources = self.known_resources.clone(); + + let (custom_metaschemas, index_data) = process_resources_mixed( pairs, retriever, &mut documents, - &mut resources, - &mut anchors, + &mut known_resources, &mut resolution_cache, draft, )?; - validate_custom_metaschemas(&custom_metaschemas, &resources)?; + validate_custom_metaschemas(&custom_metaschemas, &known_resources)?; + Ok(Registry { - documents, - resources, - anchors, + baseline: Some(self), resolution_cache: resolution_cache.into_shared(), + known_resources, + index_data, }) } - /// Create a new registry with new resources and using the given non-blocking retriever. - /// - /// # Errors - /// - /// Returns an error if any URI is invalid or if there's an issue processing the resources. + #[cfg(feature = "retrieve-async")] - pub async fn try_with_resources_and_retriever_async( - self, - pairs: impl IntoIterator, Resource)>, + async fn try_with_pending_resources_and_retriever_async( + &'a self, + pairs: impl IntoIterator, PendingResource<'a>)>, retriever: &dyn crate::AsyncRetrieve, - draft: Draft, - ) -> Result { - let mut documents = self.documents; - let mut resources = self.resources; - let mut anchors = self.anchors; - let mut resolution_cache = self.resolution_cache.into_local(); - let custom_metaschemas = process_resources_async( + draft: Option, + ) -> Result, Error> { + let mut documents = DocumentStore::new(); + let mut resolution_cache = UriCache::new(); + let mut known_resources = self.known_resources.clone(); + + let (custom_metaschemas, index_data) = process_resources_async_mixed( pairs, retriever, &mut documents, - &mut resources, - &mut anchors, + &mut known_resources, &mut resolution_cache, draft, ) .await?; - validate_custom_metaschemas(&custom_metaschemas, &resources)?; + validate_custom_metaschemas(&custom_metaschemas, &known_resources)?; + Ok(Registry { - documents, - resources, - anchors, + baseline: Some(self), resolution_cache: resolution_cache.into_shared(), + known_resources, + index_data, }) } - /// Create a new [`Resolver`] for this registry with the given base URI. - /// - /// # Errors - /// - /// Returns an error if the base URI is invalid. - pub fn try_resolver(&self, base_uri: &str) -> Result, Error> { - let base = uri::from_str(base_uri)?; - Ok(self.resolver(base)) - } - /// Create a new [`Resolver`] for this registry with a known valid base URI. - #[must_use] - pub fn resolver(&self, base_uri: Uri) -> Resolver<'_> { - Resolver::new(self, Arc::new(base_uri)) - } - pub(crate) fn anchor<'a>(&self, uri: &'a Uri, name: &'a str) -> Result<&Anchor, Error> { - let key = AnchorKeyRef::new(uri, name); - if let Some(value) = self.anchors.get(key.borrow_dyn()) { - return Ok(value); - } - let resource = &self.resources[uri]; - if let Some(id) = resource.id() { - let uri = uri::from_str(id)?; - let key = AnchorKeyRef::new(&uri, name); - if let Some(value) = self.anchors.get(key.borrow_dyn()) { - return Ok(value); - } - } - if name.contains('/') { - Err(Error::invalid_anchor(name.to_string())) - } else { - Err(Error::no_such_anchor(name.to_string())) - } - } + /// Resolves a reference URI against a base URI using registry's cache. /// /// # Errors @@ -517,119 +720,217 @@ impl Registry { pub fn resolve_against(&self, base: &Uri<&str>, uri: &str) -> Result>, Error> { self.resolution_cache.resolve_against(base, uri) } - /// Returns vocabulary set configured for given draft and contents. - /// - /// For custom meta-schemas (`Draft::Unknown`), looks up the meta-schema in the registry - /// and extracts its `$vocabulary` declaration. If the meta-schema is not registered, - /// returns the default Draft 2020-12 vocabularies. + + #[must_use] + pub fn contains_resource_uri(&self, uri: &str) -> bool { + let Ok(uri) = uri::from_str(uri) else { + return false; + }; + self.resource_by_uri(&uri).is_some() + } + + #[must_use] + pub fn contains_anchor(&self, uri: &str, name: &str) -> bool { + let Ok(uri) = uri::from_str(uri) else { + return false; + }; + self.contains_anchor_uri(&uri, name) + } + + #[must_use] + pub fn resolver(&self, base_uri: Uri) -> Resolver<'_> { + Resolver::new(self, Arc::new(base_uri)) + } + #[must_use] pub fn find_vocabularies(&self, draft: Draft, contents: &Value) -> VocabularySet { match draft.detect(contents) { Draft::Unknown => { - // Custom/unknown meta-schema - try to look it up in the registry if let Some(specification) = contents .as_object() .and_then(|obj| obj.get("$schema")) .and_then(|s| s.as_str()) { if let Ok(mut uri) = uri::from_str(specification) { - // Remove fragment for lookup (e.g., "http://example.com/schema#" -> "http://example.com/schema") - // Resources are stored without fragments, so we must strip it to find the meta-schema uri.set_fragment(None); - if let Some(resource) = self.resources.get(&uri) { - // Found the custom meta-schema - extract vocabularies + if let Some(resource) = self.resource_by_uri(&uri) { if let Ok(Some(vocabularies)) = vocabularies::find(resource.contents()) { return vocabularies; } } - // Meta-schema not registered - this will be caught during compilation - // For now, return default vocabularies to allow resource creation } } - // Default to Draft 2020-12 vocabularies for unknown meta-schemas Draft::Unknown.default_vocabularies() } draft => draft.default_vocabularies(), } } - /// Build a registry with all the given meta-schemas from specs. - pub(crate) fn build_from_meta_schemas(schemas: &[(&'static str, &'static Value)]) -> Self { - let schemas_count = schemas.len(); - let pairs = schemas - .iter() - .map(|(uri, schema)| (uri, ResourceRef::from_contents(schema))); - - let mut documents = DocumentStore::with_capacity(schemas_count); - let mut resources = ResourceMap::with_capacity(schemas_count); + #[inline] + pub(crate) fn resource_by_uri(&self, uri: &Uri) -> Option> { + self.index_data + .resources + .get(uri) + .and_then(IndexedResource::resolve) + .or_else(|| { + self.baseline + .and_then(|baseline| baseline.resource_by_uri(uri)) + }) + } - // The actual number of anchors and cache-entries varies across - // drafts. We overshoot here to avoid reallocations, using the sum - // over all specifications. - let mut anchors = AHashMap::with_capacity(8); - let mut resolution_cache = UriCache::with_capacity(35); + pub(crate) fn contains_anchor_uri(&self, uri: &Uri, name: &str) -> bool { + self.index_data + .anchors + .get(uri) + .is_some_and(|entries| entries.contains_key(name)) + || self + .baseline + .is_some_and(|baseline| baseline.contains_anchor_uri(uri, name)) + } - process_meta_schemas( - pairs, - &mut documents, - &mut resources, - &mut anchors, - &mut resolution_cache, - ) - .expect("Failed to process meta schemas"); + fn local_anchor_by_uri(&self, uri: &Uri, name: &str) -> Option> { + self.index_data + .anchors + .get(uri) + .and_then(|entries| entries.get(name)) + .and_then(IndexedAnchor::resolve) + } - Self { - documents, - resources, - anchors, - resolution_cache: resolution_cache.into_shared(), - } + fn anchor_exact(&self, uri: &Uri, name: &str) -> Option> { + self.local_anchor_by_uri(uri, name).or_else(|| { + self.baseline + .and_then(|baseline| baseline.anchor_exact(uri, name)) + }) } -} -fn process_meta_schemas( - pairs: impl IntoIterator, ResourceRef<'static>)>, - documents: &mut DocumentStore, - resources: &mut ResourceMap, - anchors: &mut AHashMap, - resolution_cache: &mut UriCache, -) -> Result<(), Error> { - let mut queue = VecDeque::with_capacity(32); + pub(crate) fn anchor(&self, uri: &Uri, name: &str) -> Result, Error> { + if let Some(anchor) = self.anchor_exact(uri, name) { + return Ok(anchor); + } - for (uri, resource) in pairs { - let uri = uri::from_str(uri.as_ref().trim_end_matches('#'))?; - let key = Arc::new(uri); - let contents: &'static Value = resource.contents(); - let wrapped_value = Arc::pin(ValueWrapper::StaticRef(contents)); - let resource = InnerResourcePtr::new((*wrapped_value).as_ref(), resource.draft()); - documents.insert(Arc::clone(&key), wrapped_value); - resources.insert(Arc::clone(&key), resource.clone()); - queue.push_back((key, resource)); - } - - // Process current queue and collect references to external resources - while let Some((mut base, resource)) = queue.pop_front() { - if let Some(id) = resource.id() { - base = resolution_cache.resolve_against(&base.borrow(), id)?; - resources.insert(base.clone(), resource.clone()); + if let Some(resource) = self.resource_by_uri(uri) { + if let Some(id) = resource.id() { + let canonical = uri::from_str(id)?; + if let Some(anchor) = self.anchor_exact(&canonical, name) { + return Ok(anchor); + } + } } - // Look for anchors - for anchor in resource.anchors() { - anchors.insert(AnchorKey::new(base.clone(), anchor.name()), anchor); + if name.contains('/') { + Err(Error::invalid_anchor(name.to_string())) + } else { + Err(Error::no_such_anchor(name.to_string())) } + } +} - // Process subresources - for contents in resource.draft().subresources_of(resource.contents()) { - let subresource_draft = resource.draft().detect(contents); - let subresource = InnerResourcePtr::new(contents, subresource_draft); - queue.push_back((base.clone(), subresource)); +/// Build prepared local index data for all documents already in `documents`. +/// Used by `build_from_meta_schemas` for the static SPECIFICATIONS registry. +fn build_prepared_index_for_documents<'a>( + documents: &DocumentStore<'a>, + resolution_cache: &mut UriCache, +) -> Result, Error> { + let mut index_data = PreparedIndex::default(); + for (doc_uri, document) in documents { + insert_root_index_entries(&mut index_data, doc_uri, document); + let root = document.contents(); + let borrowed_root = document.borrowed_contents(); + let initial_base = Arc::clone(doc_uri); + // Stack: (base_uri, json_pointer_from_root, draft) + let mut work: Vec<(Arc>, String, Draft)> = + vec![(initial_base, String::new(), document.draft())]; + while let Some((base, ptr_str, draft)) = work.pop() { + let contents = if ptr_str.is_empty() { + root + } else { + match pointer(root, &ptr_str) { + Some(v) => v, + None => continue, + } + }; + let original_base = Arc::clone(&base); + let mut current_base = base; + let (id, has_anchors) = draft.id_and_has_anchors(contents); + let is_root_entry = ptr_str.is_empty(); + let Some(parsed_pointer) = ParsedPointer::from_json_pointer(&ptr_str) else { + continue; + }; + if let Some(id) = id { + current_base = resolve_id(¤t_base, id, resolution_cache)?; + let insert_resource = current_base != original_base; + if !(is_root_entry && current_base == *doc_uri) && (insert_resource || has_anchors) + { + if let Some(root) = borrowed_root { + insert_borrowed_discovered_index_entries( + &mut index_data, + ¤t_base, + draft, + insert_resource, + if ptr_str.is_empty() { + root + } else { + pointer(root, &ptr_str) + .expect("borrowed root contents were already resolved") + }, + ); + } else { + insert_owned_discovered_index_entries( + &mut index_data, + ¤t_base, + document, + &parsed_pointer, + draft, + insert_resource, + contents, + ); + } + } + } else if has_anchors && !is_root_entry { + if let Some(root) = borrowed_root { + insert_borrowed_discovered_index_entries( + &mut index_data, + ¤t_base, + draft, + false, + if ptr_str.is_empty() { + root + } else { + pointer(root, &ptr_str) + .expect("borrowed root contents were already resolved") + }, + ); + } else { + insert_owned_discovered_index_entries( + &mut index_data, + ¤t_base, + document, + &parsed_pointer, + draft, + false, + contents, + ); + } + } + // Push children with their absolute paths + let base_for_children = Arc::clone(¤t_base); + let mut path = PathStack::from_base(ptr_str); + let _ = draft.walk_subresources_with_path( + contents, + &mut path, + &mut |p, _child, child_draft| { + work.push((Arc::clone(&base_for_children), p.to_pointer(), child_draft)); + Ok::<(), Error>(()) + }, + ); } } - Ok(()) + Ok(index_data) } +type KnownResources = AHashSet>; + #[derive(Hash, Eq, PartialEq)] struct ReferenceKey { base_ptr: NonZeroUsize, @@ -648,6 +949,27 @@ impl ReferenceKey { type ReferenceTracker = AHashSet; +/// Allocation-free local-ref deduplication: stores (`base_arc_ptr`, &`str_borrowed_from_json`). +type LocalSeen<'a> = AHashSet<(NonZeroUsize, &'a str)>; + +/// Clears a [`LocalSeen`] set and reinterprets it with a different borrow lifetime, +/// reusing the backing heap allocation across processing phases. +/// +/// # Safety +/// - The set is cleared before the lifetime change, so no `'a` references remain live. +/// - `(NonZeroUsize, &'a str)` and `(NonZeroUsize, &'b str)` have identical memory layouts +/// for any two lifetimes (`&str` is a fat pointer whose size/alignment are lifetime-independent). +/// - After `clear()` the heap allocation holds no initialized `T` values, so no pointer in +/// the allocation is ever read through the wrong lifetime. +/// - Verified under MIRI (tree borrows): no undefined behaviour detected. +#[allow(unsafe_code)] +#[inline] +unsafe fn reuse_local_seen<'b>(mut s: LocalSeen<'_>) -> LocalSeen<'b> { + s.clear(); + // SAFETY: see above — layouts identical, no live 'a refs after clear() + std::mem::transmute(s) +} + #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] enum ReferenceKind { Ref, @@ -655,24 +977,144 @@ enum ReferenceKind { } /// An entry in the processing queue. -/// The optional third element is the document root URI, used when the resource -/// was extracted from a fragment of a larger document. Local `$ref`s need to be -/// resolved against the document root, not just the fragment content. -type QueueEntry = (Arc>, InnerResourcePtr, Option>>); +/// `(base_uri, document_root_uri, pointer, draft)` +/// +/// `pointer` is a JSON Pointer relative to the document root (`""` means root). +/// Local `$ref`s are always resolved against the document root. +type QueueEntry = (Arc>, Arc>, String, Draft); + +fn insert_borrowed_anchor_entries<'a>( + index_data: &mut PreparedIndex<'a>, + uri: &Arc>, + draft: Draft, + contents: &'a Value, +) { + let anchors = index_data.anchors.get_or_insert_default(Arc::clone(uri)); + for anchor in draft.anchors(contents) { + anchors.insert( + anchor.name().to_string().into_boxed_str(), + IndexedAnchor::Borrowed(anchor), + ); + } +} + +fn insert_owned_anchor_entries<'a>( + index_data: &mut PreparedIndex<'a>, + uri: &Arc>, + document: &Arc>, + pointer: &ParsedPointer, + draft: Draft, + contents: &Value, +) { + let anchors = index_data.anchors.get_or_insert_default(Arc::clone(uri)); + for anchor in draft.anchors(contents) { + let (name, kind) = match anchor { + Anchor::Default { name, .. } => (name, IndexedAnchorKind::Default), + Anchor::Dynamic { name, .. } => (name, IndexedAnchorKind::Dynamic), + }; + anchors.insert( + name.to_string().into_boxed_str(), + IndexedAnchor::Owned { + document: Arc::clone(document), + pointer: pointer.clone(), + draft, + kind, + name: name.to_string().into_boxed_str(), + }, + ); + } +} + +fn insert_root_index_entries<'a>( + index_data: &mut PreparedIndex<'a>, + doc_key: &Arc>, + document: &Arc>, +) { + if let Some(contents) = document.borrowed_contents() { + index_data.resources.insert( + Arc::clone(doc_key), + IndexedResource::Borrowed(ResourceRef::new(contents, document.draft())), + ); + insert_borrowed_anchor_entries(index_data, doc_key, document.draft(), contents); + } else { + let pointer = ParsedPointer::default(); + index_data.resources.insert( + Arc::clone(doc_key), + IndexedResource::Owned { + document: Arc::clone(document), + pointer: pointer.clone(), + draft: document.draft(), + }, + ); + insert_owned_anchor_entries( + index_data, + doc_key, + document, + &pointer, + document.draft(), + document.contents(), + ); + } +} -struct ProcessingState { +fn insert_borrowed_discovered_index_entries<'a>( + index_data: &mut PreparedIndex<'a>, + uri: &Arc>, + draft: Draft, + has_id: bool, + contents: &'a Value, +) { + if has_id { + index_data.resources.insert( + Arc::clone(uri), + IndexedResource::Borrowed(ResourceRef::new(contents, draft)), + ); + } + insert_borrowed_anchor_entries(index_data, uri, draft, contents); +} + +fn insert_owned_discovered_index_entries<'a>( + index_data: &mut PreparedIndex<'a>, + uri: &Arc>, + document: &Arc>, + pointer: &ParsedPointer, + draft: Draft, + has_id: bool, + contents: &Value, +) { + if has_id { + index_data.resources.insert( + Arc::clone(uri), + IndexedResource::Owned { + document: Arc::clone(document), + pointer: pointer.clone(), + draft, + }, + ); + } + insert_owned_anchor_entries(index_data, uri, document, pointer, draft, contents); +} + +struct ProcessingState<'a> { queue: VecDeque, seen: ReferenceTracker, external: AHashSet<(String, Uri, ReferenceKind)>, scratch: String, refers_metaschemas: bool, - custom_metaschemas: Vec>>, + custom_metaschemas: Vec, /// Tracks schema pointers we've visited during recursive external resource collection. /// This prevents infinite recursion when schemas reference each other. visited_schemas: AHashSet, + /// Deferred local-ref targets. During the main traversal, instead of calling + /// `collect_external_resources_recursive` immediately when a local `$ref` is found, + /// the target is pushed here. After `process_queue` completes (full document traversal), + /// subresource targets are already in `visited_schemas` and return in O(1); + /// non-subresource paths (e.g. `#/components/schemas/Foo`) are still fully traversed. + deferred_refs: Vec, + index_data: PreparedIndex<'a>, } -impl ProcessingState { +impl ProcessingState<'_> { fn new() -> Self { Self { queue: VecDeque::with_capacity(32), @@ -682,179 +1124,378 @@ impl ProcessingState { refers_metaschemas: false, custom_metaschemas: Vec::new(), visited_schemas: AHashSet::new(), + deferred_refs: Vec::new(), + index_data: PreparedIndex::default(), } } } -fn process_input_resources( - pairs: impl IntoIterator, Resource)>, - documents: &mut DocumentStore, - resources: &mut ResourceMap, - state: &mut ProcessingState, -) -> Result<(), Error> { +fn process_input_resources_mixed<'a>( + pairs: impl IntoIterator, PendingResource<'a>)>, + documents: &mut DocumentStore<'a>, + known_resources: &mut KnownResources, + state: &mut ProcessingState<'a>, + draft_override: Option, +) { for (uri, resource) in pairs { - let uri = uri::from_str(uri.as_ref().trim_end_matches('#'))?; let key = Arc::new(uri); - match documents.entry(Arc::clone(&key)) { - Entry::Occupied(_) => {} - Entry::Vacant(entry) => { + let draft = match &resource { + PendingResource::OwnedValue(value) => { + draft_override.unwrap_or_else(|| Draft::default().detect(value)) + } + PendingResource::BorrowedValue(value) => { + draft_override.unwrap_or_else(|| Draft::default().detect(value)) + } + PendingResource::OwnedResource(resource) => resource.draft(), + PendingResource::BorrowedResource(resource) => resource.draft(), + }; + + let r = Arc::new(match resource { + PendingResource::OwnedValue(value) => { + let (draft, contents) = draft.create_resource(value).into_inner(); + StoredDocument::owned(contents, draft) + } + PendingResource::BorrowedValue(value) => { + let resource = draft.create_resource_ref(value); + StoredDocument::borrowed(resource.contents(), resource.draft()) + } + PendingResource::OwnedResource(resource) => { let (draft, contents) = resource.into_inner(); - let wrapped_value = Arc::pin(ValueWrapper::Owned(contents)); - let resource = InnerResourcePtr::new((*wrapped_value).as_ref(), draft); - resources.insert(Arc::clone(&key), resource.clone()); + StoredDocument::owned(contents, draft) + } + PendingResource::BorrowedResource(resource) => { + StoredDocument::borrowed(resource.contents(), resource.draft()) + } + }); - // Track resources with custom meta-schemas for later validation - if draft == Draft::Unknown { - state.custom_metaschemas.push(Arc::clone(&key)); - } + documents.insert(Arc::clone(&key), Arc::clone(&r)); + known_resources.insert((*key).clone()); + insert_root_index_entries(&mut state.index_data, &key, &r); - state.queue.push_back((key, resource, None)); - entry.insert(wrapped_value); + if draft == Draft::Unknown { + let contents = documents + .get(&key) + .expect("document was just inserted") + .contents(); + if let Some(meta_schema) = contents + .as_object() + .and_then(|obj| obj.get("$schema")) + .and_then(|schema| schema.as_str()) + { + state.custom_metaschemas.push(meta_schema.to_string()); } } + + state + .queue + .push_back((Arc::clone(&key), key, String::new(), draft)); } - Ok(()) } -fn process_queue( - state: &mut ProcessingState, - resources: &mut ResourceMap, - anchors: &mut AHashMap, +fn process_queue<'a, 'r>( + state: &mut ProcessingState<'r>, + documents: &'a DocumentStore<'r>, + known_resources: &mut KnownResources, resolution_cache: &mut UriCache, + local_seen: &mut LocalSeen<'a>, ) -> Result<(), Error> { - while let Some((mut base, resource, document_root_uri)) = state.queue.pop_front() { - if let Some(id) = resource.id() { - base = resolve_id(&base, id, resolution_cache)?; - resources.insert(base.clone(), resource.clone()); - } + while let Some((base, document_root_uri, pointer_path, draft)) = state.queue.pop_front() { + let Some(document) = documents.get(&document_root_uri) else { + continue; + }; + let root = document.contents(); + let borrowed_root = document.borrowed_contents(); + let borrowed_contents = if pointer_path.is_empty() { + borrowed_root + } else { + borrowed_root.and_then(|root| pointer(root, &pointer_path)) + }; + let Some(contents) = (if pointer_path.is_empty() { + Some(root) + } else { + pointer(root, &pointer_path) + }) else { + continue; + }; - for anchor in resource.anchors() { - anchors.insert(AnchorKey::new(base.clone(), anchor.name()), anchor); - } + let resource = ResourceRef::new(contents, draft); + let mut path = PathStack::from_base(pointer_path); + process_resource_tree( + base, + root, + borrowed_contents, + resource, + &mut path, + &document_root_uri, + document, + state, + known_resources, + resolution_cache, + local_seen, + )?; + } + Ok(()) +} - // Determine the document root for resolving local $refs. - // If document_root_uri is set (e.g., for fragment-extracted resources), - // look up the full document. Otherwise, this resource IS the document root. - let root = document_root_uri - .as_ref() - .and_then(|uri| resources.get(uri)) - .map_or_else(|| resource.contents(), InnerResourcePtr::contents); - - // Skip if already visited during local $ref resolution - let contents_ptr = std::ptr::from_ref::(resource.contents()) as usize; - if state.visited_schemas.insert(contents_ptr) { - collect_external_resources( +fn process_resource_tree<'a, 'r>( + mut base: Arc>, + root: &'a Value, + borrowed_contents: Option<&'r Value>, + resource: ResourceRef<'a>, + path: &mut PathStack<'a>, + doc_key: &Arc>, + document: &Arc>, + state: &mut ProcessingState<'r>, + known_resources: &mut KnownResources, + resolution_cache: &mut UriCache, + local_seen: &mut LocalSeen<'a>, +) -> Result<(), Error> { + let (id, has_anchors) = resource.draft().id_and_has_anchors(resource.contents()); + let is_root_entry = path.base_pointer().is_empty() && path.segments().is_empty(); + if let Some(id) = id { + let original_base = Arc::clone(&base); + base = resolve_id(&base, id, resolution_cache)?; + known_resources.insert((*base).clone()); + let insert_resource = base != original_base; + if is_root_entry && base == *doc_key { + // Root resource / anchors were already inserted under the storage URI. + } else if let Some(contents) = borrowed_contents { + insert_borrowed_discovered_index_entries( + &mut state.index_data, &base, - root, - resource.contents(), - &mut state.external, - &mut state.seen, - resolution_cache, - &mut state.scratch, - &mut state.refers_metaschemas, resource.draft(), - &mut state.visited_schemas, - )?; + insert_resource, + contents, + ); + } else if insert_resource || has_anchors { + if let Some(pointer) = ParsedPointer::from_path_stack(path) { + insert_owned_discovered_index_entries( + &mut state.index_data, + &base, + document, + &pointer, + resource.draft(), + insert_resource, + resource.contents(), + ); + } } - - // Subresources inherit the document root URI, or use the current base if none set - let subresource_root_uri = document_root_uri.or_else(|| Some(base.clone())); - for contents in resource.draft().subresources_of(resource.contents()) { - let subresource_draft = resource.draft().detect(contents); - let subresource = InnerResourcePtr::new(contents, subresource_draft); - state - .queue - .push_back((base.clone(), subresource, subresource_root_uri.clone())); + } else if has_anchors { + if is_root_entry { + // Root anchors were already inserted under the storage URI. + } else if let Some(contents) = borrowed_contents { + insert_borrowed_discovered_index_entries( + &mut state.index_data, + &base, + resource.draft(), + false, + contents, + ); + } else if let Some(pointer) = ParsedPointer::from_path_stack(path) { + insert_owned_discovered_index_entries( + &mut state.index_data, + &base, + document, + &pointer, + resource.draft(), + false, + resource.contents(), + ); } } - Ok(()) + + let contents_ptr = std::ptr::from_ref::(resource.contents()) as usize; + if state.visited_schemas.insert(contents_ptr) { + collect_external_resources( + &base, + root, + resource.contents(), + &mut state.external, + &mut state.seen, + resolution_cache, + &mut state.scratch, + &mut state.refers_metaschemas, + resource.draft(), + doc_key, + &mut state.deferred_refs, + local_seen, + )?; + } + + resource.draft().walk_subresources_with_path( + resource.contents(), + path, + &mut |child_path, child, child_draft| { + let borrowed_child = + borrowed_contents.and_then(|contents| match child_path.segments().last() { + Some(PathSegment::Key(key)) => match contents { + Value::Object(map) => map.get(*key), + _ => None, + }, + Some(PathSegment::Index(index)) => match contents { + Value::Array(list) => list.get(*index), + _ => None, + }, + None => Some(contents), + }); + process_resource_tree( + Arc::clone(&base), + root, + borrowed_child, + ResourceRef::new(child, child_draft), + child_path, + doc_key, + document, + state, + known_resources, + resolution_cache, + local_seen, + ) + }, + ) } -fn handle_fragment( +fn enqueue_fragment_entry( uri: &Uri, - resource: &InnerResourcePtr, key: &Arc>, default_draft: Draft, + documents: &DocumentStore<'_>, queue: &mut VecDeque, - document_root_uri: Arc>, ) { if let Some(fragment) = uri.fragment() { - if let Some(resolved) = pointer(resource.contents(), fragment.as_str()) { - let draft = default_draft.detect(resolved); - let contents = std::ptr::addr_of!(*resolved); - let resource = InnerResourcePtr::new(contents, draft); - queue.push_back((Arc::clone(key), resource, Some(document_root_uri))); + let Some(document) = documents.get(key) else { + return; + }; + if let Some(resolved) = pointer(document.contents(), fragment.as_str()) { + let fragment_draft = default_draft.detect(resolved); + queue.push_back(( + Arc::clone(key), + Arc::clone(key), + fragment.as_str().to_string(), + fragment_draft, + )); } } } -fn handle_metaschemas( +fn handle_metaschemas<'a>( refers_metaschemas: bool, - resources: &mut ResourceMap, - anchors: &mut AHashMap, + documents: &mut DocumentStore<'a>, + known_resources: &mut KnownResources, draft_version: Draft, -) { - if refers_metaschemas { - let schemas = metas_for_draft(draft_version); - let draft_registry = Registry::build_from_meta_schemas(schemas); - resources.reserve(draft_registry.resources.len()); - for (key, resource) in draft_registry.resources { - resources.insert(key, resource.clone()); - } - anchors.reserve(draft_registry.anchors.len()); - for (key, anchor) in draft_registry.anchors { - anchors.insert(key, anchor); + state: &mut ProcessingState<'a>, +) -> Result<(), Error> { + if !refers_metaschemas { + return Ok(()); + } + + let schemas = metas_for_draft(draft_version); + for (uri, schema) in schemas { + let key = Arc::new(uri::from_str(uri.trim_end_matches('#'))?); + if documents.contains_key(&key) { + continue; } + let draft = Draft::default().detect(schema); + documents.insert( + Arc::clone(&key), + Arc::new(StoredDocument::borrowed(schema, draft)), + ); + known_resources.insert((*key).clone()); + insert_root_index_entries( + &mut state.index_data, + &key, + documents + .get(&key) + .expect("meta-schema document was just inserted into the store"), + ); + state + .queue + .push_back((Arc::clone(&key), Arc::clone(&key), String::new(), draft)); } + Ok(()) } -fn create_resource( +fn create_resource<'a>( retrieved: Value, fragmentless: Uri, default_draft: Draft, - documents: &mut DocumentStore, - resources: &mut ResourceMap, - custom_metaschemas: &mut Vec>>, -) -> (Arc>, InnerResourcePtr) { + documents: &mut DocumentStore<'a>, + known_resources: &mut KnownResources, + index_data: &mut PreparedIndex<'a>, + custom_metaschemas: &mut Vec, +) -> (Arc>, Draft) { let draft = default_draft.detect(&retrieved); - let wrapped_value = Arc::pin(ValueWrapper::Owned(retrieved)); - let resource = InnerResourcePtr::new((*wrapped_value).as_ref(), draft); let key = Arc::new(fragmentless); - documents.insert(Arc::clone(&key), wrapped_value); - resources.insert(Arc::clone(&key), resource.clone()); + documents.insert( + Arc::clone(&key), + Arc::new(StoredDocument::owned(retrieved, draft)), + ); + + let contents = documents + .get(&key) + .expect("document was just inserted") + .contents(); + known_resources.insert((*key).clone()); + insert_root_index_entries( + index_data, + &key, + documents + .get(&key) + .expect("retrieved document was just inserted into the store"), + ); - // Track resources with custom meta-schemas for later validation if draft == Draft::Unknown { - custom_metaschemas.push(Arc::clone(&key)); + if let Some(meta_schema) = contents + .as_object() + .and_then(|obj| obj.get("$schema")) + .and_then(|schema| schema.as_str()) + { + custom_metaschemas.push(meta_schema.to_string()); + } } - (key, resource) + (key, draft) } -fn process_resources( - pairs: impl IntoIterator, Resource)>, - retriever: &dyn Retrieve, - documents: &mut DocumentStore, - resources: &mut ResourceMap, - anchors: &mut AHashMap, +/// Shared sync processing loop used during registry preparation. After the +/// initial input has been ingested into `state`, this function drives the +/// BFS-fetch cycle until all reachable external resources have been retrieved, +/// then handles meta-schema injection and runs a final queue pass. +#[allow(unsafe_code)] +fn run_sync_processing_loop<'a>( + state: &mut ProcessingState<'a>, + documents: &mut DocumentStore<'a>, + known_resources: &mut KnownResources, resolution_cache: &mut UriCache, default_draft: Draft, -) -> Result>>, Error> { - let mut state = ProcessingState::new(); - process_input_resources(pairs, documents, resources, &mut state)?; + retriever: &dyn Retrieve, +) -> Result<(), Error> { + let mut local_seen_buf: LocalSeen<'static> = LocalSeen::new(); loop { if state.queue.is_empty() && state.external.is_empty() { break; } - process_queue(&mut state, resources, anchors, resolution_cache)?; + { + // SAFETY: widens 'static → '_ (covariant); set is empty after reuse_local_seen clears it. + let mut local_seen: LocalSeen<'_> = unsafe { reuse_local_seen(local_seen_buf) }; + process_queue( + state, + documents, + known_resources, + resolution_cache, + &mut local_seen, + )?; + process_deferred_refs(state, documents, resolution_cache, &mut local_seen)?; + // SAFETY: clears all '_ refs before narrowing back to 'static to reclaim the buffer. + local_seen_buf = unsafe { reuse_local_seen(local_seen) }; + } - // Retrieve external resources for (original, uri, kind) in state.external.drain() { let mut fragmentless = uri.clone(); fragmentless.set_fragment(None); - if !resources.contains_key(&fragmentless) { + if !known_resources.contains(&fragmentless) { let retrieved = match retriever.retrieve(&fragmentless) { Ok(retrieved) => retrieved, Err(error) => { @@ -863,63 +1504,146 @@ fn process_resources( } }; - let (key, resource) = create_resource( + let (key, draft) = create_resource( retrieved, fragmentless, default_draft, documents, - resources, + known_resources, + &mut state.index_data, &mut state.custom_metaschemas, ); - handle_fragment( - &uri, - &resource, - &key, - default_draft, - &mut state.queue, - Arc::clone(&key), - ); - state.queue.push_back((key, resource, None)); + enqueue_fragment_entry(&uri, &key, default_draft, documents, &mut state.queue); + state + .queue + .push_back((Arc::clone(&key), key, String::new(), draft)); } } } - handle_metaschemas(state.refers_metaschemas, resources, anchors, default_draft); + handle_metaschemas( + state.refers_metaschemas, + documents, + known_resources, + default_draft, + state, + )?; - Ok(state.custom_metaschemas) + if !state.queue.is_empty() { + // SAFETY: widens 'static → '_ (covariant); set is empty after reuse_local_seen clears it. + let mut local_seen: LocalSeen<'_> = unsafe { reuse_local_seen(local_seen_buf) }; + process_queue( + state, + documents, + known_resources, + resolution_cache, + &mut local_seen, + )?; + process_deferred_refs(state, documents, resolution_cache, &mut local_seen)?; + } + + Ok(()) +} + +fn process_resources_mixed<'a>( + pairs: impl IntoIterator, PendingResource<'a>)>, + retriever: &dyn Retrieve, + documents: &mut DocumentStore<'a>, + known_resources: &mut KnownResources, + resolution_cache: &mut UriCache, + draft_override: Option, +) -> Result<(Vec, PreparedIndex<'a>), Error> { + let mut state = ProcessingState::new(); + process_input_resources_mixed( + pairs, + documents, + known_resources, + &mut state, + draft_override, + ); + run_sync_processing_loop( + &mut state, + documents, + known_resources, + resolution_cache, + draft_override.unwrap_or_default(), + retriever, + )?; + Ok((state.custom_metaschemas, state.index_data)) +} + +#[cfg(feature = "retrieve-async")] +async fn process_resources_async_mixed<'a>( + pairs: impl IntoIterator, PendingResource<'a>)>, + retriever: &dyn crate::AsyncRetrieve, + documents: &mut DocumentStore<'a>, + known_resources: &mut KnownResources, + resolution_cache: &mut UriCache, + draft_override: Option, +) -> Result<(Vec, PreparedIndex<'a>), Error> { + let mut state = ProcessingState::new(); + process_input_resources_mixed( + pairs, + documents, + known_resources, + &mut state, + draft_override, + ); + run_async_processing_loop( + &mut state, + documents, + known_resources, + resolution_cache, + draft_override.unwrap_or_default(), + retriever, + ) + .await?; + Ok((state.custom_metaschemas, state.index_data)) } +/// Shared async processing loop used during registry preparation. Batches +/// concurrent external retrievals with `join_all` and otherwise mirrors +/// [`run_sync_processing_loop`]. #[cfg(feature = "retrieve-async")] -async fn process_resources_async( - pairs: impl IntoIterator, Resource)>, - retriever: &dyn crate::AsyncRetrieve, - documents: &mut DocumentStore, - resources: &mut ResourceMap, - anchors: &mut AHashMap, +#[allow(unsafe_code)] +async fn run_async_processing_loop<'a>( + state: &mut ProcessingState<'a>, + documents: &mut DocumentStore<'a>, + known_resources: &mut KnownResources, resolution_cache: &mut UriCache, default_draft: Draft, -) -> Result>>, Error> { + retriever: &dyn crate::AsyncRetrieve, +) -> Result<(), Error> { type ExternalRefsByBase = AHashMap, Vec<(String, Uri, ReferenceKind)>>; - let mut state = ProcessingState::new(); - process_input_resources(pairs, documents, resources, &mut state)?; + let mut local_seen_buf: LocalSeen<'static> = LocalSeen::new(); loop { if state.queue.is_empty() && state.external.is_empty() { break; } - process_queue(&mut state, resources, anchors, resolution_cache)?; + { + // SAFETY: widens 'static → '_ (covariant); set is empty after reuse_local_seen clears it. + let mut local_seen: LocalSeen<'_> = unsafe { reuse_local_seen(local_seen_buf) }; + process_queue( + state, + documents, + known_resources, + resolution_cache, + &mut local_seen, + )?; + process_deferred_refs(state, documents, resolution_cache, &mut local_seen)?; + // SAFETY: clears all '_ refs before narrowing back to 'static to reclaim the buffer. + local_seen_buf = unsafe { reuse_local_seen(local_seen) }; + } if !state.external.is_empty() { - // Group external refs by fragmentless URI to avoid fetching the same resource multiple times. - // Multiple refs may point to the same base URL with different fragments (e.g., #/$defs/foo and #/$defs/bar). - // We need to fetch each unique base URL only once, then handle all fragment refs against it. let mut grouped = ExternalRefsByBase::new(); for (original, uri, kind) in state.external.drain() { let mut fragmentless = uri.clone(); fragmentless.set_fragment(None); - if !resources.contains_key(&fragmentless) { + if !known_resources.contains(&fragmentless) { grouped .entry(fragmentless) .or_default() @@ -927,7 +1651,6 @@ async fn process_resources_async( } } - // Fetch each unique fragmentless URI once let entries: Vec<_> = grouped.into_iter().collect(); let results = { let futures = entries @@ -940,7 +1663,6 @@ async fn process_resources_async( let retrieved = match result { Ok(retrieved) => retrieved, Err(error) => { - // Report error for the first ref that caused this fetch if let Some((original, uri, kind)) = refs.into_iter().next() { handle_retrieve_error(&uri, &original, &fragmentless, error, kind)?; } @@ -948,35 +1670,49 @@ async fn process_resources_async( } }; - let (key, resource) = create_resource( + let (key, draft) = create_resource( retrieved, fragmentless, default_draft, documents, - resources, + known_resources, + &mut state.index_data, &mut state.custom_metaschemas, ); - // Handle all fragment refs that pointed to this base URL for (_, uri, _) in &refs { - handle_fragment( - uri, - &resource, - &key, - default_draft, - &mut state.queue, - Arc::clone(&key), - ); + enqueue_fragment_entry(uri, &key, default_draft, documents, &mut state.queue); } - state.queue.push_back((key, resource, None)); + state + .queue + .push_back((Arc::clone(&key), key, String::new(), draft)); } } } - handle_metaschemas(state.refers_metaschemas, resources, anchors, default_draft); + handle_metaschemas( + state.refers_metaschemas, + documents, + known_resources, + default_draft, + state, + )?; + + if !state.queue.is_empty() { + // SAFETY: widens 'static → '_ (covariant); set is empty after reuse_local_seen clears it. + let mut local_seen: LocalSeen<'_> = unsafe { reuse_local_seen(local_seen_buf) }; + process_queue( + state, + documents, + known_resources, + resolution_cache, + &mut local_seen, + )?; + process_deferred_refs(state, documents, resolution_cache, &mut local_seen)?; + } - Ok(state.custom_metaschemas) + Ok(()) } fn handle_retrieve_error( @@ -987,13 +1723,8 @@ fn handle_retrieve_error( kind: ReferenceKind, ) -> Result<(), Error> { match kind { - ReferenceKind::Schema => { - // $schema fetch failures are non-fatal during resource processing - // Unregistered custom meta-schemas will be caught in validate_custom_metaschemas() - Ok(()) - } + ReferenceKind::Schema => Ok(()), ReferenceKind::Ref => { - // $ref fetch failures are fatal - they're required for validation if uri.scheme().as_str() == "json-schema" { Err(Error::unretrievable( original, @@ -1007,58 +1738,45 @@ fn handle_retrieve_error( } fn validate_custom_metaschemas( - custom_metaschemas: &[Arc>], - resources: &ResourceMap, + custom_metaschemas: &[String], + known_resources: &KnownResources, ) -> Result<(), Error> { - // Only validate resources with Draft::Unknown - for uri in custom_metaschemas { - if let Some(resource) = resources.get(uri) { - // Extract the $schema value from this resource - if let Some(schema_uri) = resource - .contents() - .as_object() - .and_then(|obj| obj.get("$schema")) - .and_then(|s| s.as_str()) - { - // Check if this meta-schema is registered - match uri::from_str(schema_uri) { - Ok(mut meta_uri) => { - // Remove fragment for lookup (e.g., "http://example.com/schema#" -> "http://example.com/schema") - meta_uri.set_fragment(None); - if !resources.contains_key(&meta_uri) { - return Err(Error::unknown_specification(schema_uri)); - } - } - Err(_) => { - return Err(Error::unknown_specification(schema_uri)); - } + for schema_uri in custom_metaschemas { + match uri::from_str(schema_uri) { + Ok(mut meta_uri) => { + meta_uri.set_fragment(None); + if !known_resources.contains(&meta_uri) { + return Err(Error::unknown_specification(schema_uri)); } } + Err(_) => { + return Err(Error::unknown_specification(schema_uri)); + } } } Ok(()) } -fn collect_external_resources( +fn collect_external_resources<'doc>( base: &Arc>, - root: &Value, - contents: &Value, + root: &'doc Value, + contents: &'doc Value, collected: &mut AHashSet<(String, Uri, ReferenceKind)>, seen: &mut ReferenceTracker, resolution_cache: &mut UriCache, scratch: &mut String, refers_metaschemas: &mut bool, draft: Draft, - visited: &mut AHashSet, + doc_key: &Arc>, + deferred_refs: &mut Vec, + local_seen: &mut LocalSeen<'doc>, ) -> Result<(), Error> { - // URN schemes are not supported for external resolution if base.scheme().as_str() == "urn" { return Ok(()); } macro_rules! on_reference { ($reference:expr, $key:literal) => { - // Skip well-known schema references if $reference.starts_with("https://json-schema.org/draft/") || $reference.starts_with("http://json-schema.org/draft-") || base.as_str().starts_with("https://json-schema.org/draft/") @@ -1067,12 +1785,8 @@ fn collect_external_resources( *refers_metaschemas = true; } } else if $reference != "#" { - if mark_reference(seen, base, $reference) { - // Handle local references separately as they may have nested references to external resources - if $reference.starts_with('#') { - // Use the root document for pointer resolution since local refs are always - // relative to the document root, not the current subschema. - // Also track $id changes along the path to get the correct base URI. + if $reference.starts_with('#') { + if mark_local_reference(local_seen, base, $reference) { if let Some((referenced, resolved_base)) = pointer_with_base( root, $reference.trim_start_matches('#'), @@ -1080,61 +1794,49 @@ fn collect_external_resources( resolution_cache, draft, )? { - // Recursively collect from the referenced schema and all its subresources - collect_external_resources_recursive( - &resolved_base, - root, - referenced, - collected, - seen, - resolution_cache, - scratch, - refers_metaschemas, - draft, - visited, - )?; + let target_draft = draft.detect(referenced); + deferred_refs.push(( + resolved_base, + Arc::clone(doc_key), + $reference.trim_start_matches('#').to_string(), + target_draft, + )); } - } else { - let resolved = if base.has_fragment() { - let mut base_without_fragment = base.as_ref().clone(); - base_without_fragment.set_fragment(None); - - let (path, fragment) = match $reference.split_once('#') { - Some((path, fragment)) => (path, Some(fragment)), - None => ($reference, None), - }; - - let mut resolved = (*resolution_cache - .resolve_against(&base_without_fragment.borrow(), path)?) - .clone(); - // Add the fragment back if present - if let Some(fragment) = fragment { - // It is cheaper to check if it is properly encoded than allocate given that - // the majority of inputs do not need to be additionally encoded - if let Some(encoded) = uri::EncodedString::new(fragment) { - resolved = resolved.with_fragment(Some(encoded)); - } else { - uri::encode_to(fragment, scratch); - resolved = resolved.with_fragment(Some( - uri::EncodedString::new_or_panic(scratch), - )); - scratch.clear(); - } - } - resolved - } else { - (*resolution_cache - .resolve_against(&base.borrow(), $reference)?) - .clone() + } + } else if mark_reference(seen, base, $reference) { + let resolved = if base.has_fragment() { + let mut base_without_fragment = base.as_ref().clone(); + base_without_fragment.set_fragment(None); + + let (path, fragment) = match $reference.split_once('#') { + Some((path, fragment)) => (path, Some(fragment)), + None => ($reference, None), }; - let kind = if $key == "$schema" { - ReferenceKind::Schema - } else { - ReferenceKind::Ref - }; - collected.insert(($reference.to_string(), resolved, kind)); - } + let mut resolved = (*resolution_cache + .resolve_against(&base_without_fragment.borrow(), path)?) + .clone(); + if let Some(fragment) = fragment { + if let Some(encoded) = uri::EncodedString::new(fragment) { + resolved = resolved.with_fragment(Some(encoded)); + } else { + uri::encode_to(fragment, scratch); + resolved = resolved + .with_fragment(Some(uri::EncodedString::new_or_panic(scratch))); + scratch.clear(); + } + } + resolved + } else { + (*resolution_cache.resolve_against(&base.borrow(), $reference)?).clone() + }; + + let kind = if $key == "$schema" { + ReferenceKind::Schema + } else { + ReferenceKind::Ref + }; + collected.insert(($reference.to_string(), resolved, kind)); } } }; @@ -1165,14 +1867,10 @@ fn collect_external_resources( Ok(()) } -/// Recursively collect external resources from a schema and all its subresources. -/// -/// The `visited` set tracks schema pointers we've already processed to avoid infinite -/// recursion when schemas reference each other (directly or through subresources). -fn collect_external_resources_recursive( +fn collect_external_resources_recursive<'doc>( base: &Arc>, - root: &Value, - contents: &Value, + root: &'doc Value, + contents: &'doc Value, collected: &mut AHashSet<(String, Uri, ReferenceKind)>, seen: &mut ReferenceTracker, resolution_cache: &mut UriCache, @@ -1180,8 +1878,10 @@ fn collect_external_resources_recursive( refers_metaschemas: &mut bool, draft: Draft, visited: &mut AHashSet, + doc_key: &Arc>, + deferred_refs: &mut Vec, + local_seen: &mut LocalSeen<'doc>, ) -> Result<(), Error> { - // Track by pointer address to avoid processing the same schema twice let ptr = std::ptr::from_ref::(contents) as usize; if !visited.insert(ptr) { return Ok(()); @@ -1192,7 +1892,6 @@ fn collect_external_resources_recursive( None => Arc::clone(base), }; - // First, collect from the current schema collect_external_resources( ¤t_base, root, @@ -1203,10 +1902,11 @@ fn collect_external_resources_recursive( scratch, refers_metaschemas, draft, - visited, + doc_key, + deferred_refs, + local_seen, )?; - // Then recursively process all subresources for subresource in draft.subresources_of(contents) { let subresource_draft = draft.detect(subresource); collect_external_resources_recursive( @@ -1220,19 +1920,74 @@ fn collect_external_resources_recursive( refers_metaschemas, subresource_draft, visited, + doc_key, + deferred_refs, + local_seen, )?; } Ok(()) } +/// Process deferred local-ref targets collected during the main traversal. +/// +/// Called after `process_queue` finishes so that all subresource nodes are already in +/// `visited_schemas`. Subresource targets return in O(1); non-subresource targets +/// (e.g. `#/components/schemas/Foo`) are still fully traversed. New deferred entries +/// added during traversal are also processed iteratively until none remain. +fn process_deferred_refs<'a>( + state: &mut ProcessingState<'_>, + documents: &'a DocumentStore<'a>, + resolution_cache: &mut UriCache, + local_seen: &mut LocalSeen<'a>, +) -> Result<(), Error> { + while !state.deferred_refs.is_empty() { + let batch = std::mem::take(&mut state.deferred_refs); + for (base, doc_key, pointer_path, draft) in batch { + let Some(document) = documents.get(&doc_key) else { + continue; + }; + let root = document.contents(); + let Some(contents) = (if pointer_path.is_empty() { + Some(root) + } else { + pointer(root, &pointer_path) + }) else { + continue; + }; + collect_external_resources_recursive( + &base, + root, + contents, + &mut state.external, + &mut state.seen, + resolution_cache, + &mut state.scratch, + &mut state.refers_metaschemas, + draft, + &mut state.visited_schemas, + &doc_key, + &mut state.deferred_refs, + local_seen, + )?; + } + } + Ok(()) +} + fn mark_reference(seen: &mut ReferenceTracker, base: &Arc>, reference: &str) -> bool { seen.insert(ReferenceKey::new(base, reference)) } -/// Resolve an `$id` against a base URI, handling anchor-style IDs and empty fragments. -/// -/// Anchor-style `$id` values (starting with `#`) don't change the base URI. -/// Empty fragments are stripped from the resolved URI. +fn mark_local_reference<'a>( + local_seen: &mut LocalSeen<'a>, + base: &Arc>, + reference: &'a str, +) -> bool { + let base_ptr = + NonZeroUsize::new(Arc::as_ptr(base) as usize).expect("Arc pointer should never be null"); + local_seen.insert((base_ptr, reference)) +} + fn resolve_id( base: &Arc>, id: &str, @@ -1268,11 +2023,6 @@ pub fn pointer<'a>(document: &'a Value, pointer: &str) -> Option<&'a Value> { ) } -/// Look up a value by a JSON Pointer, tracking `$id` changes along the path. -/// -/// Returns both the resolved value and the accumulated base URI after processing -/// any `$id` declarations encountered along the path. Note that anchor-style `$id` -/// values (starting with `#`) don't change the base URI. #[allow(clippy::type_complexity)] fn pointer_with_base<'a>( document: &'a Value, @@ -1293,7 +2043,6 @@ fn pointer_with_base<'a>( let mut current_draft = draft; for token in pointer.split('/').skip(1).map(unescape_segment) { - // Check for $id in the current value before traversing deeper current_draft = current_draft.detect(current); if let Some(id) = current_draft.id_of(current) { current_base = resolve_id(¤t_base, id, resolution_cache)?; @@ -1312,8 +2061,6 @@ fn pointer_with_base<'a>( }; } - // Note: We don't check $id in the final value here because - // `collect_external_resources_recursive` will handle it Ok(Some((current, current_base))) } @@ -1325,7 +2072,6 @@ pub fn parse_index(s: &str) -> Option { } s.parse().ok() } - #[cfg(test)] mod tests { use std::error::Error as _; @@ -1335,9 +2081,9 @@ mod tests { use serde_json::{json, Value}; use test_case::test_case; - use crate::{uri::from_str, Draft, Registry, Resource, Retrieve}; + use crate::{resource::PathStack, uri::from_str, Anchor, Draft, Registry, Resource, Retrieve}; - use super::{pointer, RegistryOptions, SPECIFICATIONS}; + use super::{pointer, ParsedPointer, SPECIFICATIONS}; #[test] fn test_empty_pointer() { @@ -1345,10 +2091,30 @@ mod tests { assert_eq!(pointer(&document, ""), Some(&document)); } + #[test] + fn test_parsed_pointer_from_path_stack_matches_pointer_lookup() { + let document = json!({ + "$defs": { + "foo/bar": [ + {"value": true} + ] + } + }); + let mut path = PathStack::from_base("/$defs".to_string()); + path.push_key("foo/bar"); + path.push_index(0); + + let parsed = ParsedPointer::from_path_stack(&path).expect("Pointer should parse"); + assert_eq!( + parsed.lookup(&document), + pointer(&document, "/$defs/foo~1bar/0") + ); + } + #[test] fn test_invalid_uri_on_registry_creation() { let schema = Draft::Draft202012.create_resource(json!({})); - let result = Registry::try_new(":/example.com", schema); + let result = Registry::new().add(":/example.com", schema); let error = result.expect_err("Should fail"); assert_eq!( @@ -1369,13 +2135,16 @@ mod tests { "foo": { "type": "string" } } })); - let registry = - Registry::try_new("http://example.com/schema1", schema).expect("Invalid resources"); + let registry = Registry::new() + .add("http://example.com/schema1", schema) + .expect("Invalid resources") + .prepare() + .expect("Invalid resources"); // Attempt to create a resolver for a URL not in the registry - let resolver = registry - .try_resolver("http://example.com/non_existent_schema") - .expect("Invalid base URI"); + let resolver = registry.resolver( + from_str("http://example.com/non_existent_schema").expect("Invalid base URI"), + ); let result = resolver.lookup(""); @@ -1385,20 +2154,161 @@ mod tests { ); } + #[test] + fn test_registry_can_be_built_from_borrowed_resources() { + let schema = json!({"type": "string"}); + let registry = Registry::new() + .add("urn:root", &schema) + .expect("Invalid resources") + .prepare() + .expect("Invalid resources"); + assert!(registry.contains_resource_uri("urn:root")); + } + + #[test] + fn test_prepare_builds_local_entries_for_borrowed_and_owned() { + let root = json!({"$ref": "http://example.com/remote"}); + let remote = json!({"type": "string"}); + let registry = Registry::new() + .retriever(create_test_retriever(&[( + "http://example.com/remote", + remote.clone(), + )])) + .add("http://example.com/root", &root) + .expect("Invalid resources") + .prepare() + .expect("Invalid resources"); + + let root_uri = from_str("http://example.com/root").expect("Invalid root URI"); + let remote_uri = from_str("http://example.com/remote").expect("Invalid remote URI"); + + let root_resource = registry + .resource_by_uri(&root_uri) + .expect("Borrowed root should be available from prepared local entries"); + let remote_resource = registry + .resource_by_uri(&remote_uri) + .expect("Owned retrieved document should be available from prepared local entries"); + + assert_eq!(root_resource.contents(), &root); + assert_eq!(remote_resource.contents(), &remote); + } + + #[test] + fn test_prepare_populates_local_entries_for_subresources_and_anchors() { + let registry = Registry::new() + .add( + "http://example.com/root", + json!({ + "$defs": { + "embedded": { + "$id": "http://example.com/embedded", + "$anchor": "node", + "type": "string" + } + } + }), + ) + .expect("Invalid resources") + .prepare() + .expect("Invalid resources"); + + let embedded_uri = from_str("http://example.com/embedded").expect("Invalid embedded URI"); + let embedded_resource = registry + .resource_by_uri(&embedded_uri) + .expect("Embedded subresource should be available from prepared local entries"); + assert_eq!( + embedded_resource.contents(), + &json!({ + "$id": "http://example.com/embedded", + "$anchor": "node", + "type": "string" + }) + ); + + let embedded_anchor = registry + .anchor(&embedded_uri, "node") + .expect("Embedded anchor should be available from prepared local entries"); + match embedded_anchor { + Anchor::Default { resource, .. } => assert_eq!( + resource.contents(), + &json!({ + "$id": "http://example.com/embedded", + "$anchor": "node", + "type": "string" + }) + ), + Anchor::Dynamic { .. } => panic!("Expected a default anchor"), + } + } + + #[test] + fn test_prepare_merges_anchor_entries_for_shared_effective_uri() { + let registry = Registry::new() + .add( + "http://example.com/root", + json!({ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$defs": { + "first": { + "$anchor": "first", + "type": "string" + }, + "second": { + "$anchor": "second", + "type": "integer" + } + } + }), + ) + .expect("Invalid resources") + .prepare() + .expect("Invalid resources"); + + let resolver = registry.resolver(from_str("http://example.com/root").expect("Invalid URI")); + + assert_eq!( + resolver + .lookup("#first") + .expect("First anchor should resolve") + .contents(), + &json!({ + "$anchor": "first", + "type": "string" + }) + ); + assert_eq!( + resolver + .lookup("#second") + .expect("Second anchor should resolve") + .contents(), + &json!({ + "$anchor": "second", + "type": "integer" + }) + ); + } + #[test] fn test_relative_uri_without_base() { let schema = Draft::Draft202012.create_resource(json!({"$ref": "./virtualNetwork.json"})); - let error = Registry::try_new("json-schema:///", schema).expect_err("Should fail"); + let error = Registry::new() + .add("json-schema:///", schema) + .expect("Root resource should be accepted") + .prepare() + .expect_err("Should fail"); assert_eq!(error.to_string(), "Resource './virtualNetwork.json' is not present in a registry and retrieving it failed: No base URI is available"); } #[test] - fn test_try_with_resources_requires_registered_custom_meta_schema() { - let base_registry = Registry::try_new( - "http://example.com/root", - Resource::from_contents(json!({"type": "object"})), - ) - .expect("Base registry should be created"); + fn test_prepare_requires_registered_custom_meta_schema() { + let base_registry = Registry::new() + .add( + "http://example.com/root", + Resource::from_contents(json!({"type": "object"})), + ) + .expect("Base registry should be created") + .prepare() + .expect("Base registry should be created"); let custom_schema = Resource::from_contents(json!({ "$id": "http://example.com/custom", @@ -1407,10 +2317,9 @@ mod tests { })); let error = base_registry - .try_with_resources( - [("http://example.com/custom", custom_schema)], - Draft::default(), - ) + .add("http://example.com/custom", custom_schema) + .expect("Schema should be accepted") + .prepare() .expect_err("Extending registry must fail when the custom $schema is not registered"); let error_msg = error.to_string(); @@ -1421,14 +2330,17 @@ mod tests { } #[test] - fn test_try_with_resources_accepts_registered_custom_meta_schema_fragment() { + fn test_prepare_accepts_registered_custom_meta_schema_fragment() { let meta_schema = Resource::from_contents(json!({ "$id": "http://example.com/meta/custom#", "$schema": "https://json-schema.org/draft/2020-12/schema", "type": "object" })); - let registry = Registry::try_new("http://example.com/meta/custom#", meta_schema) + let registry = Registry::new() + .add("http://example.com/meta/custom#", meta_schema) + .expect("Meta-schema should be registered successfully") + .prepare() .expect("Meta-schema should be registered successfully"); let schema = Resource::from_contents(json!({ @@ -1438,11 +2350,9 @@ mod tests { })); registry - .clone() - .try_with_resources( - [("http://example.com/schemas/my-schema", schema)], - Draft::default(), - ) + .add("http://example.com/schemas/my-schema", schema) + .expect("Schema should be accepted") + .prepare() .expect("Schema should accept registered meta-schema URI with trailing '#'"); } @@ -1480,21 +2390,24 @@ mod tests { // Register all meta-schemas and schema in a chained manner // All resources are provided upfront, so no external retrieval should occur - Registry::try_from_resources([ - ( + Registry::new() + .add( "json-schema:///meta/level-b", Resource::from_contents(meta_schema_b), - ), - ( + ) + .expect("Meta-schema should be accepted") + .add( "json-schema:///meta/level-a", Resource::from_contents(meta_schema_a), - ), - ( + ) + .expect("Meta-schema should be accepted") + .add( "json-schema:///schemas/my-schema", Resource::from_contents(schema), - ), - ]) - .expect("Chained custom meta-schemas should be accepted when all are registered"); + ) + .expect("Schema should be accepted") + .prepare() + .expect("Chained custom meta-schemas should be accepted when all are registered"); } struct TestRetriever { @@ -1529,6 +2442,40 @@ mod tests { ) } + #[test] + fn test_registry_builder_uses_custom_draft() { + let registry = Registry::new() + .draft(Draft::Draft4) + .add("urn:test", json!({})) + .expect("Resource should be accepted") + .prepare() + .expect("Registry should prepare"); + + let uri = from_str("urn:test").expect("Invalid test URI"); + assert_eq!( + registry.resource_by_uri(&uri).unwrap().draft(), + Draft::Draft4 + ); + } + + #[test] + fn test_registry_builder_uses_custom_retriever() { + let registry = Registry::new() + .retriever(create_test_retriever(&[( + "http://example.com/remote", + json!({"type": "string"}), + )])) + .add( + "http://example.com/root", + json!({"$ref": "http://example.com/remote"}), + ) + .expect("Resource should be accepted") + .prepare() + .expect("Registry should prepare"); + + assert!(registry.contains_resource_uri("http://example.com/remote")); + } + struct TestCase { input_resources: Vec<(&'static str, Value)>, remote_resources: Vec<(&'static str, Value)>, @@ -1712,39 +2659,95 @@ mod tests { .into_iter() .map(|(uri, value)| (uri, Resource::from_contents(value))); - let registry = Registry::options() - .retriever(retriever) - .build(input_pairs) - .expect("Invalid resources"); + let mut registry = Registry::new().retriever(retriever); + for (uri, resource) in input_pairs { + registry = registry.add(uri, resource).expect("Invalid resources"); + } + let registry = registry.prepare().expect("Invalid resources"); // Verify that all expected URIs are resolved and present in resources for uri in test_case.expected_resolved_uris { - let resolver = registry.try_resolver("").expect("Invalid base URI"); + let resolver = registry.resolver(from_str("").expect("Invalid base URI")); assert!(resolver.lookup(uri).is_ok()); } } #[test] fn test_default_retriever_with_remote_refs() { - let result = Registry::try_from_resources([( - "http://example.com/schema1", - Resource::from_contents(json!({"$ref": "http://example.com/schema2"})), - )]); + let result = Registry::new() + .add( + "http://example.com/schema1", + Resource::from_contents(json!({"$ref": "http://example.com/schema2"})), + ) + .expect("Resource should be accepted") + .prepare(); let error = result.expect_err("Should fail"); assert_eq!(error.to_string(), "Resource 'http://example.com/schema2' is not present in a registry and retrieving it failed: Default retriever does not fetch resources"); assert!(error.source().is_some()); } #[test] - fn test_options() { - let _registry = RegistryOptions::default() - .build([("", Resource::from_contents(json!({})))]) - .expect("Invalid resources"); + fn test_registry_new_can_add_and_prepare() { + let registry = Registry::new() + .add("urn:test", json!({"type": "string"})) + .expect("Resource should be accepted") + .prepare() + .expect("Registry should prepare"); + + assert!(registry.contains_resource_uri("urn:test")); + } + + #[test] + fn test_prepared_registry_can_be_extended_via_add() { + let original = Registry::new() + .add("urn:one", json!({"type": "string"})) + .expect("Resource should be accepted") + .prepare() + .expect("Registry should prepare"); + + let registry = original + .add("urn:two", json!({"type": "integer"})) + .expect("Resource should be accepted") + .prepare() + .expect("Registry should prepare"); + + assert!(original.contains_resource_uri("urn:one")); + assert!(!original.contains_resource_uri("urn:two")); + assert!(registry.contains_resource_uri("urn:one")); + assert!(registry.contains_resource_uri("urn:two")); + } + + #[test] + fn test_registry_builder_accepts_borrowed_values() { + let schema = json!({"type": "string"}); + let registry = Registry::new() + .add("urn:test", &schema) + .expect("Resource should be accepted") + .prepare() + .expect("Registry should prepare"); + + assert!(registry.contains_resource_uri("urn:test")); + } + + #[test] + fn test_registry_builder_accepts_borrowed_resources() { + let schema = Draft::Draft4.create_resource(json!({"type": "string"})); + let registry = Registry::new() + .add("urn:test", &schema) + .expect("Resource should be accepted") + .prepare() + .expect("Registry should prepare"); + + let uri = from_str("urn:test").expect("Invalid test URI"); + assert_eq!( + registry.resource_by_uri(&uri).unwrap().draft(), + Draft::Draft4 + ); } #[test] fn test_registry_with_duplicate_input_uris() { - let input_resources = vec![ - ( + let registry = Registry::new() + .add( "http://example.com/schema", json!({ "type": "object", @@ -1752,8 +2755,9 @@ mod tests { "foo": { "type": "string" } } }), - ), - ( + ) + .expect("First resource should be accepted") + .add( "http://example.com/schema", json!({ "type": "object", @@ -1761,25 +2765,13 @@ mod tests { "bar": { "type": "number" } } }), - ), - ]; - - let result = Registry::try_from_resources( - input_resources - .into_iter() - .map(|(uri, value)| (uri, Draft::Draft202012.create_resource(value))), - ); - - assert!( - result.is_ok(), - "Failed to create registry with duplicate input URIs" - ); - let registry = result.unwrap(); + ) + .expect("Second resource should overwrite the first") + .prepare() + .expect("Registry should prepare"); - let resource = registry - .resources - .get(&from_str("http://example.com/schema").expect("Invalid URI")) - .unwrap(); + let uri = from_str("http://example.com/schema").expect("Invalid schema URI"); + let resource = registry.resource_by_uri(&uri).unwrap(); let properties = resource .contents() .get("properties") @@ -1787,24 +2779,21 @@ mod tests { .unwrap(); assert!( - !properties.contains_key("bar"), - "Registry should contain the earliest added schema" - ); - assert!( - properties.contains_key("foo"), - "Registry should contain the overwritten schema" + !properties.contains_key("foo"), + "Registry should replace the earlier explicit input resource" ); + assert!(properties.contains_key("bar")); } #[test] fn test_resolver_debug() { let registry = SPECIFICATIONS - .clone() - .try_with_resource("http://example.com", Resource::from_contents(json!({}))) + .add("http://example.com", json!({})) + .expect("Invalid resource") + .prepare() .expect("Invalid resource"); - let resolver = registry - .try_resolver("http://127.0.0.1/schema") - .expect("Invalid base URI"); + let resolver = + registry.resolver(from_str("http://127.0.0.1/schema").expect("Invalid base URI")); assert_eq!( format!("{resolver:?}"), "Resolver { base_uri: \"http://127.0.0.1/schema\", scopes: \"[]\" }" @@ -1812,12 +2801,13 @@ mod tests { } #[test] - fn test_try_with_resource() { + fn test_prepare_with_specifications_registry() { let registry = SPECIFICATIONS - .clone() - .try_with_resource("http://example.com", Resource::from_contents(json!({}))) + .add("http://example.com", json!({})) + .expect("Invalid resource") + .prepare() .expect("Invalid resource"); - let resolver = registry.try_resolver("").expect("Invalid base URI"); + let resolver = registry.resolver(from_str("").expect("Invalid base URI")); let resolved = resolver .lookup("http://json-schema.org/draft-06/schema#/definitions/schemaArray") .expect("Lookup failed"); @@ -1831,10 +2821,72 @@ mod tests { ); } + #[test] + fn test_prepare_preserves_existing_local_entries() { + let original = Registry::new() + .add( + "http://example.com/root", + Resource::from_contents(json!({ + "$defs": { + "embedded": { + "$id": "http://example.com/embedded", + "type": "string" + } + } + })), + ) + .expect("Invalid root schema") + .prepare() + .expect("Invalid root schema"); + + let extended = original + .add( + "http://example.com/other", + Resource::from_contents(json!({"type": "number"})), + ) + .expect("Registry extension should succeed") + .prepare() + .expect("Registry extension should succeed"); + + let resolver = extended.resolver(from_str("").expect("Invalid base URI")); + let embedded = resolver + .lookup("http://example.com/embedded") + .expect("Embedded subresource URI should stay indexed after extension"); + assert_eq!( + embedded.contents(), + &json!({ + "$id": "http://example.com/embedded", + "type": "string" + }) + ); + } + + #[test] + fn test_prepared_registry_can_be_extended_via_extend() { + let original = Registry::new() + .add("urn:one", json!({"type": "string"})) + .expect("Resource should be accepted") + .prepare() + .expect("Registry should prepare"); + + let registry = original + .extend([("urn:two", json!({"type": "integer"}))]) + .expect("Resources should be accepted") + .prepare() + .expect("Registry should prepare"); + + assert!(original.contains_resource_uri("urn:one")); + assert!(!original.contains_resource_uri("urn:two")); + assert!(registry.contains_resource_uri("urn:one")); + assert!(registry.contains_resource_uri("urn:two")); + } + #[test] fn test_invalid_reference() { let resource = Draft::Draft202012.create_resource(json!({"$schema": "$##"})); - let _ = Registry::try_new("http://#/", resource); + let _ = Registry::new() + .add("http://#/", resource) + .and_then(super::RegistryBuilder::prepare); } } @@ -1876,12 +2928,14 @@ mod async_tests { #[tokio::test] async fn test_default_async_retriever_with_remote_refs() { - let result = Registry::options() + let result = Registry::new() .async_retriever(DefaultRetriever) - .build([( + .add( "http://example.com/schema1", Resource::from_contents(json!({"$ref": "http://example.com/schema2"})), - )]) + ) + .expect("Resource should be accepted") + .async_prepare() .await; let error = result.expect_err("Should fail"); @@ -1890,18 +2944,21 @@ mod async_tests { } #[tokio::test] - async fn test_async_options() { - let _registry = Registry::options() + async fn test_async_prepare() { + let _registry = Registry::new() .async_retriever(DefaultRetriever) - .build([("", Draft::default().create_resource(json!({})))]) + .add("", Draft::default().create_resource(json!({}))) + .expect("Invalid resources") + .async_prepare() .await .expect("Invalid resources"); } #[tokio::test] async fn test_async_registry_with_duplicate_input_uris() { - let input_resources = vec![ - ( + let registry = Registry::new() + .async_retriever(DefaultRetriever) + .add( "http://example.com/schema", json!({ "type": "object", @@ -1909,8 +2966,9 @@ mod async_tests { "foo": { "type": "string" } } }), - ), - ( + ) + .expect("First resource should be accepted") + .add( "http://example.com/schema", json!({ "type": "object", @@ -1918,28 +2976,14 @@ mod async_tests { "bar": { "type": "number" } } }), - ), - ]; - - let result = Registry::options() - .async_retriever(DefaultRetriever) - .build( - input_resources - .into_iter() - .map(|(uri, value)| (uri, Draft::Draft202012.create_resource(value))), ) - .await; - - assert!( - result.is_ok(), - "Failed to create registry with duplicate input URIs" - ); - let registry = result.unwrap(); + .expect("Second resource should overwrite the first") + .async_prepare() + .await + .expect("Registry should prepare"); - let resource = registry - .resources - .get(&uri::from_str("http://example.com/schema").expect("Invalid URI")) - .unwrap(); + let uri = uri::from_str("http://example.com/schema").expect("Invalid schema URI"); + let resource = registry.resource_by_uri(&uri).unwrap(); let properties = resource .contents() .get("properties") @@ -1947,38 +2991,103 @@ mod async_tests { .unwrap(); assert!( - !properties.contains_key("bar"), - "Registry should contain the earliest added schema" - ); - assert!( - properties.contains_key("foo"), - "Registry should contain the overwritten schema" + !properties.contains_key("foo"), + "Registry should replace the earlier explicit input resource" ); + assert!(properties.contains_key("bar")); + } + + #[tokio::test] + async fn test_registry_builder_async_prepare_uses_async_retriever() { + let registry = Registry::new() + .async_retriever(TestAsyncRetriever::with_schema( + "http://example.com/schema2", + json!({"type": "object"}), + )) + .add( + "http://example.com", + json!({"$ref": "http://example.com/schema2"}), + ) + .expect("Resource should be accepted") + .async_prepare() + .await + .expect("Registry should prepare"); + + let resolver = registry.resolver(uri::from_str("").expect("Invalid base URI")); + let resolved = resolver + .lookup("http://example.com/schema2") + .expect("Lookup failed"); + assert_eq!(resolved.contents(), &json!({"type": "object"})); } #[tokio::test] - async fn test_async_try_with_resource() { + async fn test_async_prepare_with_remote_resource() { let retriever = TestAsyncRetriever::with_schema( "http://example.com/schema2", json!({"type": "object"}), ); - let registry = Registry::options() + let registry = Registry::new() .async_retriever(retriever) - .build([( + .add( "http://example.com", Resource::from_contents(json!({"$ref": "http://example.com/schema2"})), - )]) + ) + .expect("Invalid resource") + .async_prepare() .await .expect("Invalid resource"); - let resolver = registry.try_resolver("").expect("Invalid base URI"); + let resolver = registry.resolver(uri::from_str("").expect("Invalid base URI")); let resolved = resolver .lookup("http://example.com/schema2") .expect("Lookup failed"); assert_eq!(resolved.contents(), &json!({"type": "object"})); } + #[tokio::test] + async fn test_async_prepare_preserves_existing_local_entries() { + let original = Registry::new() + .async_retriever(DefaultRetriever) + .add( + "http://example.com/root", + Resource::from_contents(json!({ + "$defs": { + "embedded": { + "$id": "http://example.com/embedded", + "type": "string" + } + } + })), + ) + .expect("Invalid root schema") + .async_prepare() + .await + .expect("Invalid root schema"); + + let extended = original + .add( + "http://example.com/other", + Resource::from_contents(json!({"type": "number"})), + ) + .expect("Registry extension should succeed") + .async_prepare() + .await + .expect("Registry extension should succeed"); + + let resolver = extended.resolver(uri::from_str("").expect("Invalid base URI")); + let embedded = resolver + .lookup("http://example.com/embedded") + .expect("Embedded subresource URI should stay indexed after async extension"); + assert_eq!( + embedded.contents(), + &json!({ + "$id": "http://example.com/embedded", + "type": "string" + }) + ); + } + #[tokio::test] async fn test_async_registry_with_multiple_refs() { let retriever = TestAsyncRetriever { @@ -1994,9 +3103,9 @@ mod async_tests { ]), }; - let registry = Registry::options() + let registry = Registry::new() .async_retriever(retriever) - .build([( + .add( "http://example.com/schema1", Resource::from_contents(json!({ "type": "object", @@ -2005,11 +3114,13 @@ mod async_tests { "str": {"$ref": "http://example.com/schema3"} } })), - )]) + ) + .expect("Invalid resource") + .async_prepare() .await .expect("Invalid resource"); - let resolver = registry.try_resolver("").expect("Invalid base URI"); + let resolver = registry.resolver(uri::from_str("").expect("Invalid base URI")); // Check both references are resolved correctly let resolved2 = resolver @@ -2047,9 +3158,9 @@ mod async_tests { ]), }; - let registry = Registry::options() + let registry = Registry::new() .async_retriever(retriever) - .build([( + .add( "http://example.com/person", Resource::from_contents(json!({ "type": "object", @@ -2058,11 +3169,13 @@ mod async_tests { "address": {"$ref": "http://example.com/address"} } })), - )]) + ) + .expect("Invalid resource") + .async_prepare() .await .expect("Invalid resource"); - let resolver = registry.try_resolver("").expect("Invalid base URI"); + let resolver = registry.resolver(uri::from_str("").expect("Invalid base URI")); // Verify nested reference resolution let resolved = resolver @@ -2120,9 +3233,9 @@ mod async_tests { }; // Schema references the same external URL with different fragments - let registry = Registry::options() + let registry = Registry::new() .async_retriever(retriever) - .build([( + .add( "http://example.com/main", Resource::from_contents(json!({ "type": "object", @@ -2131,7 +3244,9 @@ mod async_tests { "age": { "$ref": "http://example.com/external#/$defs/bar" } } })), - )]) + ) + .expect("Invalid resource") + .async_prepare() .await .expect("Invalid resource"); @@ -2142,9 +3257,8 @@ mod async_tests { "External schema should be fetched only once, but was fetched {fetches} times" ); - let resolver = registry - .try_resolver("http://example.com/main") - .expect("Invalid base URI"); + let resolver = + registry.resolver(uri::from_str("http://example.com/main").expect("Invalid base URI")); // Verify both fragment references resolve correctly let foo = resolver diff --git a/crates/jsonschema-referencing/src/resolver.rs b/crates/jsonschema-referencing/src/resolver.rs index e3d39ee1..8937c440 100644 --- a/crates/jsonschema-referencing/src/resolver.rs +++ b/crates/jsonschema-referencing/src/resolver.rs @@ -4,14 +4,14 @@ use std::sync::Arc; use fluent_uri::Uri; use serde_json::Value; -use crate::{list::List, resource::JsonSchemaResource, Draft, Error, Registry, ResourceRef}; +use crate::{list::List, Anchor, Draft, Error, Registry, ResourceRef, VocabularySet}; /// A reference resolver. /// /// Resolves references against the base URI and looks up the result in the registry. #[derive(Clone)] pub struct Resolver<'r> { - pub(crate) registry: &'r Registry, + pub(crate) registry: &'r Registry<'r>, base_uri: Arc>, scopes: List>, } @@ -46,7 +46,8 @@ impl fmt::Debug for Resolver<'_> { impl<'r> Resolver<'r> { /// Create a new `Resolver` with the given registry and base URI. - pub(crate) fn new(registry: &'r Registry, base_uri: Arc>) -> Self { + #[inline] + pub(crate) fn new(registry: &'r Registry<'r>, base_uri: Arc>) -> Self { Self { registry, base_uri, @@ -54,6 +55,7 @@ impl<'r> Resolver<'r> { } } #[must_use] + #[inline] pub fn base_uri(&self) -> Arc> { self.base_uri.clone() } @@ -77,7 +79,7 @@ impl<'r> Resolver<'r> { (uri, fragment) }; - let Some(retrieved) = self.registry.resources.get(&*uri) else { + let Some(retrieved) = self.registry.resource_by_uri(&uri) else { return Err(Error::unretrievable( uri.as_str(), "Retrieving external resources is not supported once the registry is populated" @@ -91,7 +93,7 @@ impl<'r> Resolver<'r> { } if !fragment.is_empty() { - let retrieved = self.registry.anchor(&uri, fragment)?; + let retrieved = self.lookup_anchor(&uri, fragment)?; let resolver = self.evolve(uri); return retrieved.resolve(resolver); } @@ -146,19 +148,23 @@ impl<'r> Resolver<'r> { Ok(resolved) } + + #[inline] + pub(crate) fn lookup_anchor<'a>( + &self, + uri: &'a Uri, + name: &'a str, + ) -> Result, Error> { + self.registry.anchor(uri, name) + } + /// Create a resolver for a subresource. /// /// # Errors /// /// Returns an error if the resource id cannot be resolved against the base URI of this resolver. + #[inline] pub fn in_subresource(&self, subresource: ResourceRef<'_>) -> Result { - self.in_subresource_inner(&subresource) - } - - pub(crate) fn in_subresource_inner( - &self, - subresource: &impl JsonSchemaResource, - ) -> Result { if let Some(id) = subresource.id() { let base_uri = self.registry.resolve_against(&self.base_uri.borrow(), id)?; Ok(Resolver { @@ -171,9 +177,11 @@ impl<'r> Resolver<'r> { } } #[must_use] + #[inline] pub fn dynamic_scope(&self) -> List> { self.scopes.clone() } + #[inline] fn evolve(&self, base_uri: Arc>) -> Resolver<'r> { if !self.base_uri.as_str().is_empty() && (self.scopes.is_empty() || base_uri != self.base_uri) @@ -196,9 +204,15 @@ impl<'r> Resolver<'r> { /// # Errors /// /// If the reference is invalid. + #[inline] pub fn resolve_against(&self, base: &Uri<&str>, uri: &str) -> Result>, Error> { self.registry.resolve_against(base, uri) } + + #[must_use] + pub fn find_vocabularies(&self, draft: Draft, contents: &Value) -> VocabularySet { + self.registry.find_vocabularies(draft, contents) + } } /// A reference resolved to its contents by a [`Resolver`]. diff --git a/crates/jsonschema-referencing/src/resource.rs b/crates/jsonschema-referencing/src/resource.rs index becb8e7b..2777743c 100644 --- a/crates/jsonschema-referencing/src/resource.rs +++ b/crates/jsonschema-referencing/src/resource.rs @@ -1,19 +1,94 @@ -use std::{ - borrow::Cow, - sync::atomic::{AtomicPtr, Ordering}, -}; +use std::borrow::Cow; use serde_json::Value; -use crate::{Anchor, Draft, Error, Resolved, Resolver, Segments}; +use crate::{write_escaped_str, write_index, Draft, Error, Resolved, Resolver, Segments}; -pub(crate) trait JsonSchemaResource { - fn contents(&self) -> &Value; - fn draft(&self) -> Draft; - fn id(&self) -> Option<&str> { - self.draft() - .id_of(self.contents()) - .map(|id| id.trim_end_matches('#')) +/// A segment in a JSON Pointer path, stored lazily to avoid string allocation during traversal. +pub(crate) enum PathSegment<'a> { + Key(&'a str), + Index(usize), +} + +/// A lazy JSON Pointer path that avoids string building during schema traversal. +/// The path is only materialized to a `String` when actually needed (e.g., for skeleton entries). +pub(crate) struct PathStack<'a> { + /// Owned initial path from the queue entry (empty string for root resources). + base: String, + /// Dynamically accumulated segments as the traversal descends. + segments: Vec>, +} + +impl<'a> PathStack<'a> { + #[inline] + pub(crate) fn from_base(base: String) -> Self { + Self { + base, + segments: Vec::new(), + } + } + + /// Push a key segment. Returns a checkpoint to restore with `truncate`. + #[inline] + pub(crate) fn push_key(&mut self, key: &'a str) -> usize { + let checkpoint = self.segments.len(); + self.segments.push(PathSegment::Key(key)); + checkpoint + } + + /// Push a numeric index segment. Returns a checkpoint to restore with `truncate`. + #[inline] + pub(crate) fn push_index(&mut self, idx: usize) -> usize { + let checkpoint = self.segments.len(); + self.segments.push(PathSegment::Index(idx)); + checkpoint + } + + /// Restore the stack to the given checkpoint (removing segments added after it). + #[inline] + pub(crate) fn truncate(&mut self, checkpoint: usize) { + self.segments.truncate(checkpoint); + } + + #[inline] + pub(crate) fn base_pointer(&self) -> &str { + &self.base + } + + #[inline] + pub(crate) fn segments(&self) -> &[PathSegment<'a>] { + &self.segments + } + + /// Materialize the full JSON Pointer path as an owned `String`. + /// Only called when a skeleton entry is actually needed. + pub(crate) fn to_pointer(&self) -> String { + if self.segments.is_empty() { + return self.base.clone(); + } + let extra = self + .segments + .iter() + .map(|segment| match segment { + PathSegment::Key(key) => 1 + key.len(), + PathSegment::Index(idx) => 1 + idx.checked_ilog10().unwrap_or(0) as usize + 1, + }) + .sum::(); + let mut s = String::with_capacity(self.base.len() + extra); + s.push_str(&self.base); + for seg in &self.segments { + match seg { + PathSegment::Key(k) => { + s.push('/'); + write_escaped_str(&mut s, k); + } + PathSegment::Index(i) => { + s.push('/'); + write_index(&mut s, *i); + } + } + } + s } } @@ -25,19 +100,23 @@ pub struct Resource { } impl Resource { + #[inline] pub(crate) fn new(contents: Value, draft: Draft) -> Self { Self { contents, draft } } + #[inline] pub(crate) fn into_inner(self) -> (Draft, Value) { (self.draft, self.contents) } /// Resource contents. #[must_use] + #[inline] pub fn contents(&self) -> &Value { &self.contents } /// JSON Schema draft under which this contents is interpreted. #[must_use] + #[inline] pub fn draft(&self) -> Draft { self.draft } @@ -45,6 +124,7 @@ impl Resource { /// /// Unknown `$schema` values are treated as `Draft::Unknown`. #[must_use] + #[inline] pub fn from_contents(contents: Value) -> Resource { Draft::default().detect(&contents).create_resource(contents) } @@ -59,14 +139,17 @@ pub struct ResourceRef<'a> { impl<'a> ResourceRef<'a> { #[must_use] + #[inline] pub fn new(contents: &'a Value, draft: Draft) -> Self { Self { contents, draft } } #[must_use] + #[inline] pub fn contents(&self) -> &'a Value { self.contents } #[must_use] + #[inline] pub fn draft(&self) -> Draft { self.draft } @@ -75,81 +158,29 @@ impl<'a> ResourceRef<'a> { /// /// Unknown `$schema` values are treated as `Draft::Unknown`. #[must_use] + #[inline] pub fn from_contents(contents: &'a Value) -> Self { let draft = Draft::default().detect(contents); Self::new(contents, draft) } #[must_use] + #[inline] pub fn id(&self) -> Option<&str> { - JsonSchemaResource::id(self) - } -} - -impl JsonSchemaResource for ResourceRef<'_> { - fn contents(&self) -> &Value { - self.contents - } - - fn draft(&self) -> Draft { self.draft + .id_of(self.contents) + .map(|id| id.trim_end_matches('#')) } } -/// A pointer to a pinned resource. -pub(crate) struct InnerResourcePtr { - contents: AtomicPtr, - draft: Draft, -} - -impl Clone for InnerResourcePtr { - fn clone(&self) -> Self { - Self { - contents: AtomicPtr::new(self.contents.load(Ordering::Relaxed)), - draft: self.draft, - } - } -} - -impl std::fmt::Debug for InnerResourcePtr { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - f.debug_struct("InnerResourcePtr") - .field("contents", self.contents()) - .field("draft", &self.draft) - .finish() - } -} - -impl InnerResourcePtr { - pub(crate) fn new(contents: *const Value, draft: Draft) -> Self { - Self { - contents: AtomicPtr::new(contents.cast_mut()), - draft, - } - } - - #[allow(unsafe_code)] - pub(crate) fn contents(&self) -> &Value { - // SAFETY: The pointer is valid as long as the registry exists - unsafe { &*self.contents.load(Ordering::Relaxed) } - } - - #[inline] - pub(crate) fn draft(&self) -> Draft { - self.draft - } - - pub(crate) fn anchors(&self) -> impl Iterator + '_ { - self.draft().anchors(self.contents()) - } - - pub(crate) fn pointer<'r>( - &'r self, +impl<'r> ResourceRef<'r> { + pub(crate) fn pointer( + self, pointer: &str, mut resolver: Resolver<'r>, ) -> Result, Error> { // INVARIANT: Pointer always starts with `/` - let mut contents = self.contents(); + let mut contents = self.contents; let mut segments = Segments::new(); let original_pointer = pointer; let pointer = percent_encoding::percent_decode_str(&pointer[1..]) @@ -176,27 +207,17 @@ impl InnerResourcePtr { segments.push(segment); } let last = &resolver; - let new_resolver = self.draft().maybe_in_subresource( + let new_resolver = self.draft.maybe_in_subresource( &segments, &resolver, - &InnerResourcePtr::new(contents, self.draft()), + ResourceRef::new(contents, self.draft), )?; if new_resolver != *last { segments = Segments::new(); } resolver = new_resolver; } - Ok(Resolved::new(contents, resolver, self.draft())) - } -} - -impl JsonSchemaResource for InnerResourcePtr { - fn contents(&self) -> &Value { - self.contents() - } - - fn draft(&self) -> Draft { - self.draft + Ok(Resolved::new(contents, resolver, self.draft)) } } @@ -251,9 +272,9 @@ pub fn unescape_segment(mut segment: &str) -> Cow<'_, str> { #[cfg(test)] mod tests { - use std::{error::Error, sync::Arc}; + use std::error::Error; - use crate::{resource::InnerResourcePtr, Draft, Registry}; + use crate::{Draft, Registry}; use super::unescape_segment; use serde_json::json; @@ -292,7 +313,7 @@ mod tests { assert_eq!(unescaped, double_replaced, "Failed for: {input}"); } - fn create_test_registry() -> Registry { + fn create_test_registry() -> Registry<'static> { let schema = Draft::Draft202012.create_resource(json!({ "type": "object", "properties": { @@ -300,7 +321,11 @@ mod tests { "bar": { "type": "array", "items": [{"type": "number"}, {"type": "boolean"}] } } })); - Registry::try_new("http://example.com", schema).expect("Invalid resources") + Registry::new() + .add("http://example.com", schema) + .expect("Invalid resources") + .prepare() + .expect("Invalid resources") } #[test] @@ -311,38 +336,23 @@ mod tests { "foo": { "type": "string" } } })); - let registry = - Registry::try_new("http://example.com", schema.clone()).expect("Invalid resources"); + let registry = Registry::new() + .add("http://example.com", &schema) + .expect("Invalid resources") + .prepare() + .expect("Invalid resources"); let resolver = registry - .try_resolver("http://example.com") - .expect("Invalid base URI"); + .resolver(crate::uri::from_str("http://example.com").expect("Invalid base URI")); let resolved = resolver.lookup("#").expect("Lookup failed"); assert_eq!(resolved.contents(), schema.contents()); } - #[test] - fn test_inner_resource_ptr_debug() { - let value = Arc::pin(json!({ - "foo": "bar", - "number": 42 - })); - - let ptr = InnerResourcePtr::new(std::ptr::addr_of!(*value), Draft::Draft202012); - - let expected = format!( - "InnerResourcePtr {{ contents: {:?}, draft: Draft202012 }}", - *value - ); - assert_eq!(format!("{ptr:?}"), expected); - } - #[test] fn test_percent_encoded_non_utf8() { let registry = create_test_registry(); let resolver = registry - .try_resolver("http://example.com") - .expect("Invalid base URI"); + .resolver(crate::uri::from_str("http://example.com").expect("Invalid base URI")); let result = resolver.lookup("#/%FF"); let error = result.expect_err("Should fail"); @@ -357,8 +367,7 @@ mod tests { fn test_array_index_as_string() { let registry = create_test_registry(); let resolver = registry - .try_resolver("http://example.com") - .expect("Invalid base URI"); + .resolver(crate::uri::from_str("http://example.com").expect("Invalid base URI")); let result = resolver.lookup("#/properties/bar/items/one"); let error = result.expect_err("Should fail"); @@ -373,8 +382,7 @@ mod tests { fn test_array_index_out_of_bounds() { let registry = create_test_registry(); let resolver = registry - .try_resolver("http://example.com") - .expect("Invalid base URI"); + .resolver(crate::uri::from_str("http://example.com").expect("Invalid base URI")); let result = resolver.lookup("#/properties/bar/items/2"); assert_eq!( @@ -387,8 +395,7 @@ mod tests { fn test_unknown_property() { let registry = create_test_registry(); let resolver = registry - .try_resolver("http://example.com") - .expect("Invalid base URI"); + .resolver(crate::uri::from_str("http://example.com").expect("Invalid base URI")); let result = resolver.lookup("#/properties/baz"); assert_eq!( diff --git a/crates/jsonschema-referencing/src/small_map.rs b/crates/jsonschema-referencing/src/small_map.rs new file mode 100644 index 00000000..cde274f5 --- /dev/null +++ b/crates/jsonschema-referencing/src/small_map.rs @@ -0,0 +1,259 @@ +use std::mem; + +use ahash::AHashMap; + +pub(crate) enum SmallMap { + Small(micromap::Map), + Large(AHashMap), +} + +impl SmallMap { + #[inline] + pub(crate) fn new() -> Self { + SmallMap::Small(micromap::Map::new()) + } + + #[inline] + pub(crate) fn get(&self, key: &Q) -> Option<&V> + where + K: std::borrow::Borrow + Eq + std::hash::Hash, + Q: std::hash::Hash + Eq + ?Sized, + { + match self { + SmallMap::Small(map) => map.get(key), + SmallMap::Large(map) => map.get(key), + } + } + + #[inline] + pub(crate) fn insert(&mut self, key: K, value: V) + where + K: Eq + std::hash::Hash, + { + match self { + SmallMap::Small(map) => { + // Fits inline (new key with space) or overwrites existing key. + if map.len() < N || map.get(&key).is_some() { + map.insert(key, value); + return; + } + // Full and key is new — fall through to promotion. + } + SmallMap::Large(map) => { + map.insert(key, value); + return; + } + } + // Promotion: atomically swap self to Large, drain old Small into it. + let old = match mem::replace(self, SmallMap::Large(AHashMap::with_capacity(N + 1))) { + SmallMap::Small(m) => m, + SmallMap::Large(_) => unreachable!(), + }; + if let SmallMap::Large(new_map) = self { + for (k, v) in old { + new_map.insert(k, v); + } + new_map.insert(key, value); + } + } + + #[inline] + pub(crate) fn get_or_insert_default(&mut self, key: K) -> &mut V + where + K: Eq + std::hash::Hash, + V: Default, + { + // Determine whether we need to promote before borrowing map contents. + let needs_promotion = match self { + SmallMap::Small(map) => map.len() >= N && map.get(&key).is_none(), + SmallMap::Large(_) => false, + }; + if needs_promotion { + // Promotion (same pattern as insert). + let old = match mem::replace(self, SmallMap::Large(AHashMap::with_capacity(N + 1))) { + SmallMap::Small(m) => m, + SmallMap::Large(_) => unreachable!(), + }; + if let SmallMap::Large(new_map) = self { + for (k, v) in old { + new_map.insert(k, v); + } + return new_map.entry(key).or_default(); + } + unreachable!() + } + match self { + SmallMap::Small(map) => map.entry(key).or_default(), + SmallMap::Large(map) => map.entry(key).or_default(), + } + } + + #[inline] + pub(crate) fn contains_key(&self, key: &Q) -> bool + where + K: std::borrow::Borrow + Eq + std::hash::Hash, + Q: std::hash::Hash + Eq + ?Sized, + { + self.get(key).is_some() + } +} + +impl Default for SmallMap { + fn default() -> Self { + Self::new() + } +} + +impl Clone for SmallMap { + fn clone(&self) -> Self { + match self { + SmallMap::Small(map) => SmallMap::Small(map.clone()), + SmallMap::Large(map) => SmallMap::Large(map.clone()), + } + } +} + +impl std::fmt::Debug for SmallMap { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + SmallMap::Small(map) => write!(f, "{map:?}"), + SmallMap::Large(map) => write!(f, "{map:?}"), + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_new_is_small() { + let map: SmallMap = SmallMap::new(); + assert!(matches!(map, SmallMap::Small(_))); + } + + #[test] + fn test_insert_and_get() { + let mut map: SmallMap = SmallMap::new(); + map.insert("key".to_string(), 42); + assert_eq!(map.get("key"), Some(&42)); + assert_eq!(map.get("missing"), None); + } + + #[test] + fn test_duplicate_key_overwrites() { + let mut map: SmallMap = SmallMap::new(); + map.insert("key".to_string(), 1); + map.insert("key".to_string(), 2); + assert_eq!(map.get("key"), Some(&2)); + // Verify no duplicate was added: inserting the same key again should still return 2 + map.insert("key".to_string(), 2); + assert_eq!(map.get("key"), Some(&2)); + } + + #[test] + fn test_multiple_inserts_stay_small() { + let mut map: SmallMap = SmallMap::new(); + for i in 0..4 { + map.insert(i, i * 10); + } + assert!(matches!(map, SmallMap::Small(_))); + for i in 0..4 { + assert_eq!(map.get(&i), Some(&(i * 10))); + } + } + + #[test] + fn test_promotion_at_n_plus_1() { + let mut map: SmallMap = SmallMap::new(); + for i in 0..5 { + map.insert(i, i * 10); + } + assert!(matches!(map, SmallMap::Large(_))); + for i in 0..5 { + assert_eq!(map.get(&i), Some(&(i * 10))); + } + } + + #[test] + fn test_contains_key_small() { + let mut map: SmallMap = SmallMap::new(); + map.insert("a".to_string(), 1); + assert!(map.contains_key("a")); + assert!(!map.contains_key("b")); + } + + #[test] + fn test_contains_key_large() { + let mut map: SmallMap = SmallMap::new(); + map.insert(1, 10); + map.insert(2, 20); + map.insert(3, 30); // triggers promotion + assert!(map.contains_key(&1)); + assert!(map.contains_key(&3)); + assert!(!map.contains_key(&99)); + } + + #[test] + fn test_get_or_insert_default_miss() { + let mut map: SmallMap> = SmallMap::new(); + map.get_or_insert_default("key".to_string()).push(1); + assert_eq!(map.get("key"), Some(&vec![1u32])); + } + + #[test] + fn test_get_or_insert_default_hit() { + let mut map: SmallMap = SmallMap::new(); + map.insert("key".to_string(), 42); + let v = map.get_or_insert_default("key".to_string()); + assert_eq!(*v, 42); + // Verify key was not duplicated: original value still accessible + assert_eq!(map.get("key"), Some(&42)); + } + + #[test] + fn test_get_or_insert_default_promotes() { + let mut map: SmallMap = SmallMap::new(); + map.insert(1, 10); + map.insert(2, 20); + // map is full; inserting new key via get_or_insert_default should promote + *map.get_or_insert_default(3) = 30; + assert!(matches!(map, SmallMap::Large(_))); + assert_eq!(map.get(&3), Some(&30)); + assert_eq!(map.get(&1), Some(&10)); + assert_eq!(map.get(&2), Some(&20)); + } + + #[test] + fn test_nested_map() { + let mut outer: SmallMap> = SmallMap::new(); + outer.get_or_insert_default(1).insert("a".to_string(), 10); + outer.get_or_insert_default(1).insert("b".to_string(), 20); + outer.get_or_insert_default(2).insert("c".to_string(), 30); + assert_eq!(outer.get(&1).unwrap().get("a"), Some(&10)); + assert_eq!(outer.get(&1).unwrap().get("b"), Some(&20)); + assert_eq!(outer.get(&2).unwrap().get("c"), Some(&30)); + } + + #[test] + fn test_clone() { + let mut map: SmallMap = SmallMap::new(); + map.insert(1, 10); + map.insert(2, 20); + let cloned = map.clone(); + assert_eq!(cloned.get(&1), Some(&10)); + assert_eq!(cloned.get(&2), Some(&20)); + } + + #[test] + fn test_clone_large() { + let mut map: SmallMap = SmallMap::new(); + for i in 0..5 { + map.insert(i, i * 10); + } + let cloned = map.clone(); + for i in 0..5 { + assert_eq!(cloned.get(&i), Some(&(i * 10))); + } + } +} diff --git a/crates/jsonschema-referencing/src/specification/draft201909.rs b/crates/jsonschema-referencing/src/specification/draft201909.rs index 0cf2dac7..0e982121 100644 --- a/crates/jsonschema-referencing/src/specification/draft201909.rs +++ b/crates/jsonschema-referencing/src/specification/draft201909.rs @@ -1,8 +1,93 @@ use serde_json::Value; -use crate::{resource::InnerResourcePtr, segments::Segment, Error, Resolver, Segments}; +use crate::{resource::PathStack, specification::Draft, Error, Resolver, ResourceRef, Segments}; -use super::subresources::SubresourceIteratorInner; +use super::subresources::{self, SubresourceIteratorInner}; + +pub(crate) fn walk_subresources_with_path<'a, E, F>( + contents: &'a Value, + path: &mut PathStack<'a>, + draft: Draft, + f: &mut F, +) -> Result<(), E> +where + F: FnMut(&mut PathStack<'a>, &'a Value, Draft) -> Result<(), E>, +{ + let Some(schema) = contents.as_object() else { + return Ok(()); + }; + for (key, value) in schema { + match key.as_str() { + "additionalItems" + | "additionalProperties" + | "contains" + | "contentSchema" + | "else" + | "if" + | "not" + | "propertyNames" + | "then" + | "unevaluatedItems" + | "unevaluatedProperties" => { + let c = path.push_key(key); + f(path, value, draft.detect(value))?; + path.truncate(c); + } + "allOf" | "anyOf" | "oneOf" => { + if let Some(arr) = value.as_array() { + let c1 = path.push_key(key); + for (i, item) in arr.iter().enumerate() { + let c2 = path.push_index(i); + f(path, item, draft.detect(item))?; + path.truncate(c2); + } + path.truncate(c1); + } + } + "$defs" | "definitions" | "dependentSchemas" | "patternProperties" | "properties" => { + if let Some(obj) = value.as_object() { + let c1 = path.push_key(key); + for (child_key, child_value) in obj { + let c2 = path.push_key(child_key); + f(path, child_value, draft.detect(child_value))?; + path.truncate(c2); + } + path.truncate(c1); + } + } + "items" => { + let c1 = path.push_key("items"); + match value { + Value::Array(arr) => { + for (i, item) in arr.iter().enumerate() { + let c2 = path.push_index(i); + f(path, item, draft.detect(item))?; + path.truncate(c2); + } + } + _ => f(path, value, draft.detect(value))?, + } + path.truncate(c1); + } + "dependencies" => { + if let Some(obj) = value.as_object() { + let c1 = path.push_key(key); + for (child_key, child_value) in obj { + if !child_value.is_object() { + continue; + } + let c2 = path.push_key(child_key); + f(path, child_value, draft.detect(child_value))?; + path.truncate(c2); + } + path.truncate(c1); + } + } + _ => {} + } + } + Ok(()) +} pub(crate) fn object_iter<'a>( (key, value): (&'a String, &'a Value), @@ -49,7 +134,7 @@ pub(crate) fn object_iter<'a>( pub(crate) fn maybe_in_subresource<'r>( segments: &Segments, resolver: &Resolver<'r>, - subresource: &InnerResourcePtr, + subresource: ResourceRef<'_>, ) -> Result, Error> { const IN_VALUE: &[&str] = &[ "additionalItems", @@ -75,18 +160,11 @@ pub(crate) fn maybe_in_subresource<'r>( "properties", ]; - let mut iter = segments.iter(); - while let Some(segment) = iter.next() { - if let Segment::Key(key) = segment { - if *key == "items" && subresource.contents().is_object() { - return resolver.in_subresource_inner(subresource); - } - if !IN_VALUE.contains(&key.as_ref()) - && (!IN_CHILD.contains(&key.as_ref()) || iter.next().is_none()) - { - return Ok(resolver.clone()); - } - } - } - resolver.in_subresource_inner(subresource) + subresources::maybe_in_subresource_with_items_and_dependencies( + segments, + resolver, + subresource, + IN_VALUE, + IN_CHILD, + ) } diff --git a/crates/jsonschema-referencing/src/specification/draft4.rs b/crates/jsonschema-referencing/src/specification/draft4.rs index 9082062c..6ff5e652 100644 --- a/crates/jsonschema-referencing/src/specification/draft4.rs +++ b/crates/jsonschema-referencing/src/specification/draft4.rs @@ -1,9 +1,97 @@ use serde_json::Value; -use crate::{resource::InnerResourcePtr, Error, Resolver, Segments}; +use crate::{resource::PathStack, specification::Draft, Error, Resolver, ResourceRef, Segments}; use super::subresources::{self, SubresourceIteratorInner}; +pub(crate) fn walk_subresources_with_path<'a, E, F>( + contents: &'a Value, + path: &mut PathStack<'a>, + draft: Draft, + f: &mut F, +) -> Result<(), E> +where + F: FnMut(&mut PathStack<'a>, &'a Value, Draft) -> Result<(), E>, +{ + let Some(schema) = contents.as_object() else { + return Ok(()); + }; + for (key, value) in schema { + match key.as_str() { + "additionalItems" | "additionalProperties" if value.is_object() => { + let c = path.push_key(key); + f(path, value, draft.detect(value))?; + path.truncate(c); + } + "contains" + | "contentSchema" + | "else" + | "if" + | "not" + | "propertyNames" + | "then" + | "unevaluatedItems" + | "unevaluatedProperties" => { + let c = path.push_key(key); + f(path, value, draft.detect(value))?; + path.truncate(c); + } + "allOf" | "anyOf" | "oneOf" | "prefixItems" => { + if let Some(arr) = value.as_array() { + let c1 = path.push_key(key); + for (i, item) in arr.iter().enumerate() { + let c2 = path.push_index(i); + f(path, item, draft.detect(item))?; + path.truncate(c2); + } + path.truncate(c1); + } + } + "$defs" | "definitions" | "dependentSchemas" | "patternProperties" | "properties" => { + if let Some(obj) = value.as_object() { + let c1 = path.push_key(key); + for (child_key, child_value) in obj { + let c2 = path.push_key(child_key); + f(path, child_value, draft.detect(child_value))?; + path.truncate(c2); + } + path.truncate(c1); + } + } + "items" => { + let c1 = path.push_key(key); + match value { + Value::Array(arr) => { + for (i, item) in arr.iter().enumerate() { + let c2 = path.push_index(i); + f(path, item, draft.detect(item))?; + path.truncate(c2); + } + } + _ => f(path, value, draft.detect(value))?, + } + path.truncate(c1); + } + "dependencies" => { + if let Some(obj) = value.as_object() { + let c1 = path.push_key(key); + for (child_key, child_value) in obj { + if !child_value.is_object() { + continue; + } + let c2 = path.push_key(child_key); + f(path, child_value, draft.detect(child_value))?; + path.truncate(c2); + } + path.truncate(c1); + } + } + _ => {} + } + } + Ok(()) +} + pub(crate) fn object_iter<'a>( (key, value): (&'a String, &'a Value), ) -> SubresourceIteratorInner<'a> { @@ -59,7 +147,7 @@ pub(crate) fn object_iter<'a>( pub(crate) fn maybe_in_subresource<'r>( segments: &Segments, resolver: &Resolver<'r>, - subresource: &InnerResourcePtr, + subresource: ResourceRef<'_>, ) -> Result, Error> { const IN_VALUE: &[&str] = &["additionalItems", "additionalProperties", "not"]; const IN_CHILD: &[&str] = &[ diff --git a/crates/jsonschema-referencing/src/specification/draft6.rs b/crates/jsonschema-referencing/src/specification/draft6.rs index 81ff8218..82ed7f7b 100644 --- a/crates/jsonschema-referencing/src/specification/draft6.rs +++ b/crates/jsonschema-referencing/src/specification/draft6.rs @@ -1,9 +1,84 @@ use serde_json::Value; -use crate::{resource::InnerResourcePtr, Error, Resolver, Segments}; +use crate::{resource::PathStack, specification::Draft, Error, Resolver, ResourceRef, Segments}; use super::subresources::{self, SubresourceIteratorInner}; +pub(crate) fn walk_subresources_with_path<'a, E, F>( + contents: &'a Value, + path: &mut PathStack<'a>, + draft: Draft, + f: &mut F, +) -> Result<(), E> +where + F: FnMut(&mut PathStack<'a>, &'a Value, Draft) -> Result<(), E>, +{ + let Some(schema) = contents.as_object() else { + return Ok(()); + }; + for (key, value) in schema { + match key.as_str() { + "additionalItems" | "additionalProperties" | "contains" | "not" | "propertyNames" => { + let c = path.push_key(key); + f(path, value, draft.detect(value))?; + path.truncate(c); + } + "allOf" | "anyOf" | "oneOf" => { + if let Some(arr) = value.as_array() { + let c1 = path.push_key(key); + for (i, item) in arr.iter().enumerate() { + let c2 = path.push_index(i); + f(path, item, draft.detect(item))?; + path.truncate(c2); + } + path.truncate(c1); + } + } + "definitions" | "patternProperties" | "properties" => { + if let Some(obj) = value.as_object() { + let c1 = path.push_key(key); + for (child_key, child_value) in obj { + let c2 = path.push_key(child_key); + f(path, child_value, draft.detect(child_value))?; + path.truncate(c2); + } + path.truncate(c1); + } + } + "items" => { + let c1 = path.push_key("items"); + match value { + Value::Array(arr) => { + for (i, item) in arr.iter().enumerate() { + let c2 = path.push_index(i); + f(path, item, draft.detect(item))?; + path.truncate(c2); + } + } + _ => f(path, value, draft.detect(value))?, + } + path.truncate(c1); + } + "dependencies" => { + if let Some(obj) = value.as_object() { + let c1 = path.push_key(key); + for (child_key, child_value) in obj { + if !child_value.is_object() { + continue; + } + let c2 = path.push_key(child_key); + f(path, child_value, draft.detect(child_value))?; + path.truncate(c2); + } + path.truncate(c1); + } + } + _ => {} + } + } + Ok(()) +} + pub(crate) fn object_iter<'a>( (key, value): (&'a String, &'a Value), ) -> SubresourceIteratorInner<'a> { @@ -43,7 +118,7 @@ pub(crate) fn object_iter<'a>( pub(crate) fn maybe_in_subresource<'r>( segments: &Segments, resolver: &Resolver<'r>, - subresource: &InnerResourcePtr, + subresource: ResourceRef<'_>, ) -> Result, Error> { const IN_VALUE: &[&str] = &[ "additionalItems", diff --git a/crates/jsonschema-referencing/src/specification/draft7.rs b/crates/jsonschema-referencing/src/specification/draft7.rs index c61af4f4..ca4cde92 100644 --- a/crates/jsonschema-referencing/src/specification/draft7.rs +++ b/crates/jsonschema-referencing/src/specification/draft7.rs @@ -1,9 +1,91 @@ use serde_json::Value; -use crate::{resource::InnerResourcePtr, Error, Resolver, Segments}; +use crate::{resource::PathStack, specification::Draft, Error, Resolver, ResourceRef, Segments}; use super::subresources::{self, SubresourceIteratorInner}; +pub(crate) fn walk_subresources_with_path<'a, E, F>( + contents: &'a Value, + path: &mut PathStack<'a>, + draft: Draft, + f: &mut F, +) -> Result<(), E> +where + F: FnMut(&mut PathStack<'a>, &'a Value, Draft) -> Result<(), E>, +{ + let Some(schema) = contents.as_object() else { + return Ok(()); + }; + for (key, value) in schema { + match key.as_str() { + "additionalItems" + | "additionalProperties" + | "contains" + | "else" + | "if" + | "not" + | "propertyNames" + | "then" => { + let c = path.push_key(key); + f(path, value, draft.detect(value))?; + path.truncate(c); + } + "allOf" | "anyOf" | "oneOf" => { + if let Some(arr) = value.as_array() { + let c1 = path.push_key(key); + for (i, item) in arr.iter().enumerate() { + let c2 = path.push_index(i); + f(path, item, draft.detect(item))?; + path.truncate(c2); + } + path.truncate(c1); + } + } + "definitions" | "patternProperties" | "properties" => { + if let Some(obj) = value.as_object() { + let c1 = path.push_key(key); + for (child_key, child_value) in obj { + let c2 = path.push_key(child_key); + f(path, child_value, draft.detect(child_value))?; + path.truncate(c2); + } + path.truncate(c1); + } + } + "items" => { + let c1 = path.push_key("items"); + match value { + Value::Array(arr) => { + for (i, item) in arr.iter().enumerate() { + let c2 = path.push_index(i); + f(path, item, draft.detect(item))?; + path.truncate(c2); + } + } + _ => f(path, value, draft.detect(value))?, + } + path.truncate(c1); + } + "dependencies" => { + if let Some(obj) = value.as_object() { + let c1 = path.push_key(key); + for (child_key, child_value) in obj { + if !child_value.is_object() { + continue; + } + let c2 = path.push_key(child_key); + f(path, child_value, draft.detect(child_value))?; + path.truncate(c2); + } + path.truncate(c1); + } + } + _ => {} + } + } + Ok(()) +} + pub(crate) fn object_iter<'a>( (key, value): (&'a String, &'a Value), ) -> SubresourceIteratorInner<'a> { @@ -57,7 +139,7 @@ pub(crate) fn object_iter<'a>( pub(crate) fn maybe_in_subresource<'r>( segments: &Segments, resolver: &Resolver<'r>, - subresource: &InnerResourcePtr, + subresource: ResourceRef<'_>, ) -> Result, Error> { const IN_VALUE: &[&str] = &[ "additionalItems", diff --git a/crates/jsonschema-referencing/src/specification/mod.rs b/crates/jsonschema-referencing/src/specification/mod.rs index 97e307c0..d04b35ce 100644 --- a/crates/jsonschema-referencing/src/specification/mod.rs +++ b/crates/jsonschema-referencing/src/specification/mod.rs @@ -10,7 +10,6 @@ mod subresources; use crate::{ anchors, - resource::InnerResourcePtr, vocabularies::{VocabularySet, DRAFT_2019_09_VOCABULARIES, DRAFT_2020_12_VOCABULARIES}, Anchor, Error, Resolver, Resource, ResourceRef, Segments, }; @@ -92,6 +91,62 @@ impl Draft { Draft::Draft201909 | Draft::Draft202012 | Draft::Unknown => ids::dollar_id(contents), } } + + /// Returns `(id, has_any_anchor)` when both pieces of information are needed + /// during registry preparation. + #[inline] + pub(crate) fn id_and_has_anchors(self, contents: &Value) -> (Option<&str>, bool) { + let Some(obj) = contents.as_object() else { + return (None, false); + }; + match self { + Draft::Draft4 => { + if obj.len() <= 3 { + scan_legacy_id_small(obj) + } else { + let raw_id = obj.get("id").and_then(Value::as_str); + let is_anchor = raw_id.is_some_and(|id| id.starts_with('#')); + let plain_id = match (is_anchor, obj.contains_key("$ref")) { + (false, false) => raw_id, + _ => None, + }; + (plain_id, is_anchor) + } + } + Draft::Draft6 | Draft::Draft7 => { + if obj.len() <= 3 { + scan_legacy_dollar_id_small(obj) + } else { + let raw_id = obj.get("$id").and_then(Value::as_str); + let is_anchor = raw_id.is_some_and(|id| id.starts_with('#')); + let plain_id = match (is_anchor, obj.contains_key("$ref")) { + (false, false) => raw_id, + _ => None, + }; + (plain_id, is_anchor) + } + } + Draft::Draft201909 => { + if obj.len() <= 2 { + scan_id_and_anchor_small(obj) + } else { + let id = obj.get("$id").and_then(Value::as_str); + let has_anchor = obj.get("$anchor").and_then(Value::as_str).is_some(); + (id, has_anchor) + } + } + Draft::Draft202012 | Draft::Unknown => { + if obj.len() <= 3 { + scan_id_and_any_anchor_small(obj) + } else { + let id = obj.get("$id").and_then(Value::as_str); + let has_anchor = obj.get("$anchor").and_then(Value::as_str).is_some() + || obj.get("$dynamicAnchor").and_then(Value::as_str).is_some(); + (id, has_anchor) + } + } + } + } pub fn subresources_of(self, contents: &Value) -> impl Iterator { match contents.as_object() { Some(schema) => { @@ -107,7 +162,7 @@ impl Draft { None => SubresourceIterator::Empty, } } - pub(crate) fn anchors(self, contents: &Value) -> impl Iterator { + pub(crate) fn anchors(self, contents: &Value) -> impl Iterator> { match self { Draft::Draft4 => anchors::legacy_anchor_in_id(self, contents), Draft::Draft6 | Draft::Draft7 => anchors::legacy_anchor_in_dollar_id(self, contents), @@ -119,7 +174,7 @@ impl Draft { self, segments: &Segments, resolver: &Resolver<'r>, - subresource: &InnerResourcePtr, + subresource: ResourceRef<'_>, ) -> Result, Error> { match self { Draft::Draft4 => draft4::maybe_in_subresource(segments, resolver, subresource), @@ -133,6 +188,26 @@ impl Draft { } } } + pub(crate) fn walk_subresources_with_path<'a, E, F>( + self, + contents: &'a Value, + path: &mut crate::resource::PathStack<'a>, + f: &mut F, + ) -> Result<(), E> + where + F: FnMut(&mut crate::resource::PathStack<'a>, &'a Value, Draft) -> Result<(), E>, + { + match self { + Draft::Draft4 => draft4::walk_subresources_with_path(contents, path, self, f), + Draft::Draft6 => draft6::walk_subresources_with_path(contents, path, self, f), + Draft::Draft7 => draft7::walk_subresources_with_path(contents, path, self, f), + Draft::Draft201909 => draft201909::walk_subresources_with_path(contents, path, self, f), + Draft::Draft202012 | Draft::Unknown => { + subresources::walk_subresources_with_path(contents, path, self, f) + } + } + } + /// Identifies known JSON schema keywords per draft. #[must_use] pub fn is_known_keyword(&self, keyword: &str) -> bool { @@ -229,6 +304,76 @@ impl Draft { } } +fn scan_legacy_id_small(obj: &serde_json::Map) -> (Option<&str>, bool) { + let mut raw_id = None; + let mut has_ref = false; + + for (key, value) in obj { + match key.as_str() { + "id" => raw_id = value.as_str(), + "$ref" => has_ref = true, + _ => {} + } + } + + let is_anchor = raw_id.is_some_and(|id| id.starts_with('#')); + let plain_id = match (is_anchor, has_ref) { + (false, false) => raw_id, + _ => None, + }; + (plain_id, is_anchor) +} + +fn scan_legacy_dollar_id_small(obj: &serde_json::Map) -> (Option<&str>, bool) { + let mut raw_id = None; + let mut has_ref = false; + + for (key, value) in obj { + match key.as_str() { + "$id" => raw_id = value.as_str(), + "$ref" => has_ref = true, + _ => {} + } + } + + let is_anchor = raw_id.is_some_and(|id| id.starts_with('#')); + let plain_id = match (is_anchor, has_ref) { + (false, false) => raw_id, + _ => None, + }; + (plain_id, is_anchor) +} + +fn scan_id_and_anchor_small(obj: &serde_json::Map) -> (Option<&str>, bool) { + let mut id = None; + let mut has_anchor = false; + + for (key, value) in obj { + match key.as_str() { + "$id" => id = value.as_str(), + "$anchor" => has_anchor |= value.as_str().is_some(), + _ => {} + } + } + + (id, has_anchor) +} + +fn scan_id_and_any_anchor_small(obj: &serde_json::Map) -> (Option<&str>, bool) { + let mut id = None; + let mut has_anchor = false; + + for (key, value) in obj { + match key.as_str() { + "$id" => id = value.as_str(), + "$anchor" | "$dynamicAnchor" => has_anchor |= value.as_str().is_some(), + _ => {} + } + } + + (id, has_anchor) +} + #[cfg(test)] mod tests { use crate::Draft; diff --git a/crates/jsonschema-referencing/src/specification/subresources.rs b/crates/jsonschema-referencing/src/specification/subresources.rs index 9de4b3a5..7ee689b3 100644 --- a/crates/jsonschema-referencing/src/specification/subresources.rs +++ b/crates/jsonschema-referencing/src/specification/subresources.rs @@ -3,7 +3,71 @@ use std::iter::FlatMap; use serde_json::Value; -use crate::{resource::InnerResourcePtr, segments::Segment, Error, Resolver, Segments}; +use crate::{ + resource::PathStack, segments::Segment, specification::Draft, Error, Resolver, ResourceRef, + Segments, +}; + +/// Walk the direct subresources of `contents` (Draft 2020-12 / Unknown), +/// calling `f(path, &Value, Draft)` for each one. +/// `path` is the lazy JSON pointer to the current node; segments are pushed before each +/// call to `f` and popped afterward. +pub(crate) fn walk_subresources_with_path<'a, E, F>( + contents: &'a Value, + path: &mut PathStack<'a>, + draft: Draft, + f: &mut F, +) -> Result<(), E> +where + F: FnMut(&mut PathStack<'a>, &'a Value, Draft) -> Result<(), E>, +{ + let Some(schema) = contents.as_object() else { + return Ok(()); + }; + for (key, value) in schema { + match key.as_str() { + "additionalProperties" + | "contains" + | "contentSchema" + | "else" + | "if" + | "items" + | "not" + | "propertyNames" + | "then" + | "unevaluatedItems" + | "unevaluatedProperties" => { + let c = path.push_key(key); + f(path, value, draft.detect(value))?; + path.truncate(c); + } + "allOf" | "anyOf" | "oneOf" | "prefixItems" => { + if let Some(arr) = value.as_array() { + let c1 = path.push_key(key); + for (i, item) in arr.iter().enumerate() { + let c2 = path.push_index(i); + f(path, item, draft.detect(item))?; + path.truncate(c2); + } + path.truncate(c1); + } + } + "$defs" | "definitions" | "dependentSchemas" | "patternProperties" | "properties" => { + if let Some(obj) = value.as_object() { + let c1 = path.push_key(key); + for (child_key, child_value) in obj { + let c2 = path.push_key(child_key); + f(path, child_value, draft.detect(child_value))?; + path.truncate(c2); + } + path.truncate(c1); + } + } + _ => {} + } + } + Ok(()) +} type ObjectIter<'a> = FlatMap< serde_json::map::Iter<'a>, @@ -101,7 +165,7 @@ pub(crate) fn object_iter<'a>( pub(crate) fn maybe_in_subresource<'r>( segments: &Segments, resolver: &Resolver<'r>, - subresource: &InnerResourcePtr, + subresource: ResourceRef<'_>, ) -> Result, Error> { const IN_VALUE: &[&str] = &[ "additionalProperties", @@ -138,14 +202,14 @@ pub(crate) fn maybe_in_subresource<'r>( } } } - resolver.in_subresource_inner(subresource) + resolver.in_subresource(subresource) } #[inline] pub(crate) fn maybe_in_subresource_with_items_and_dependencies<'r>( segments: &Segments, resolver: &Resolver<'r>, - subresource: &InnerResourcePtr, + subresource: ResourceRef<'_>, in_value: &[&str], in_child: &[&str], ) -> Result, Error> { @@ -153,7 +217,7 @@ pub(crate) fn maybe_in_subresource_with_items_and_dependencies<'r>( while let Some(segment) = iter.next() { if let Segment::Key(key) = segment { if (*key == "items" || *key == "dependencies") && subresource.contents().is_object() { - return resolver.in_subresource_inner(subresource); + return resolver.in_subresource(subresource); } if !in_value.contains(&key.as_ref()) && (!in_child.contains(&key.as_ref()) || iter.next().is_none()) @@ -162,7 +226,7 @@ pub(crate) fn maybe_in_subresource_with_items_and_dependencies<'r>( } } } - resolver.in_subresource_inner(subresource) + resolver.in_subresource(subresource) } #[cfg(test)] diff --git a/crates/jsonschema-referencing/tests/suite.rs b/crates/jsonschema-referencing/tests/suite.rs index 967392a1..4f322978 100644 --- a/crates/jsonschema-referencing/tests/suite.rs +++ b/crates/jsonschema-referencing/tests/suite.rs @@ -43,15 +43,16 @@ fn test_suite(draft: &'static str, test: Test) { "json-schema-draft-2020-12" => Draft::Draft202012, _ => panic!("Unknown draft"), }; - let registry = Registry::try_from_resources( - test.registry - .into_iter() - .map(|(uri, content)| (uri, draft.create_resource(content))), - ) - .expect("Invalid registry"); - let resolver = registry - .try_resolver(test.base_uri.unwrap_or_default()) - .expect("Invalid base URI"); + let mut registry = Registry::new().draft(draft); + for (uri, content) in test.registry { + registry = registry + .add(uri, draft.create_resource(content)) + .expect("Invalid registry input"); + } + let registry = registry.prepare().expect("Invalid registry"); + let resolver = registry.resolver( + referencing::uri::from_str(test.base_uri.unwrap_or_default()).expect("Invalid base URI"), + ); if test.error.is_some() { assert!(resolver.lookup(test.reference).is_err()); } else { diff --git a/crates/jsonschema/src/bundler.rs b/crates/jsonschema/src/bundler.rs index 3d21e1dc..357e4057 100644 --- a/crates/jsonschema/src/bundler.rs +++ b/crates/jsonschema/src/bundler.rs @@ -2,11 +2,10 @@ use crate::{compiler, options::ValidationOptions}; use ahash::AHashSet; use referencing::{Draft, Resolver}; use serde_json::{Map, Value}; - fn bundle_from_registry( schema: &Value, draft: Draft, - registry: &referencing::Registry, + registry: &referencing::Registry<'_>, base_uri: &referencing::Uri, ) -> Result { let resolver = registry.resolver(base_uri.clone()); @@ -19,27 +18,50 @@ fn bundle_from_registry( } pub(crate) fn bundle_with_options( - config: &ValidationOptions, + config: &ValidationOptions<'_>, schema: &Value, ) -> Result { let draft = config.draft_for(schema)?; - let resource = draft.create_resource(schema.clone()); let resource_ref = draft.create_resource_ref(schema); + if let Some(registry) = config.registry { + let requested_base_uri = + compiler::resolve_base_uri(config.base_uri.as_ref(), resource_ref.id())?; + let overlay = registry + .add(requested_base_uri.as_str(), resource_ref)? + .retriever(config.retriever.clone()) + .draft(draft) + .prepare()?; + let base_uri = + compiler::normalized_base_uri_for_generated_registry(&overlay, &requested_base_uri); + return bundle_from_registry(schema, draft, &overlay, &base_uri); + } let (registry, base_uri) = - compiler::build_registry(config, draft, resource, resource_ref.id())?; + compiler::build_registry(config, draft, resource_ref, resource_ref.id())?; bundle_from_registry(schema, draft, ®istry, &base_uri) } #[cfg(feature = "resolve-async")] pub(crate) async fn bundle_with_options_async( - config: &crate::options::ValidationOptions>, + config: &crate::options::ValidationOptions<'_, std::sync::Arc>, schema: &Value, ) -> Result { let draft = config.draft_for(schema).await?; - let resource = draft.create_resource(schema.clone()); let resource_ref = draft.create_resource_ref(schema); + if let Some(registry) = config.registry { + let requested_base_uri = + compiler::resolve_base_uri(config.base_uri.as_ref(), resource_ref.id())?; + let overlay = registry + .add(requested_base_uri.as_str(), resource_ref)? + .async_retriever(config.retriever.clone()) + .draft(draft) + .async_prepare() + .await?; + let base_uri = + compiler::normalized_base_uri_for_generated_registry(&overlay, &requested_base_uri); + return bundle_from_registry(schema, draft, &overlay, &base_uri); + } let (registry, base_uri) = - compiler::build_registry_async(config, draft, resource, resource_ref.id()).await?; + compiler::build_registry_async(config, draft, resource_ref, resource_ref.id()).await?; bundle_from_registry(schema, draft, ®istry, &base_uri) } diff --git a/crates/jsonschema/src/compiler.rs b/crates/jsonschema/src/compiler.rs index 95a42ee0..8d7667cf 100644 --- a/crates/jsonschema/src/compiler.rs +++ b/crates/jsonschema/src/compiler.rs @@ -19,11 +19,10 @@ use crate::{ }; use ahash::{AHashMap, AHashSet}; use referencing::{ - uri, Draft, List, Registry, Resolved, Resolver, Resource, ResourceRef, Uri, Vocabulary, - VocabularySet, + uri, Draft, List, Resolved, Resolver, ResourceRef, Uri, Vocabulary, VocabularySet, }; use serde_json::{Map, Value}; -use std::{borrow::Cow, cell::RefCell, iter::once, rc::Rc, sync::Arc}; +use std::{cell::RefCell, rc::Rc, sync::Arc}; const DEFAULT_SCHEME: &str = "json-schema"; pub(crate) const DEFAULT_BASE_URI: &str = "json-schema:///"; @@ -33,6 +32,62 @@ type SharedCache = Rc>>; /// Type alias for shared sets in compiler state. type SharedSet = Rc>>; +pub(crate) trait CompilationOptions { + fn validate_formats(&self) -> Option; + fn are_unknown_formats_ignored(&self) -> bool; + fn get_content_media_type_check(&self, media_type: &str) -> Option; + fn content_encoding_check(&self, content_encoding: &str) -> Option; + fn get_content_encoding_convert( + &self, + content_encoding: &str, + ) -> Option; + fn get_keyword_factory(&self, name: &str) -> Option<&Arc>; + fn get_format(&self, format: &str) -> Option<(&String, &Arc)>; + fn pattern_options(&self) -> PatternEngineOptions; + fn email_options(&self) -> Option<&email_address::Options>; +} + +impl CompilationOptions for ValidationOptions<'_, R> { + fn validate_formats(&self) -> Option { + ValidationOptions::validate_formats(self) + } + + fn are_unknown_formats_ignored(&self) -> bool { + ValidationOptions::are_unknown_formats_ignored(self) + } + + fn get_content_media_type_check(&self, media_type: &str) -> Option { + ValidationOptions::get_content_media_type_check(self, media_type) + } + + fn content_encoding_check(&self, content_encoding: &str) -> Option { + ValidationOptions::content_encoding_check(self, content_encoding) + } + + fn get_content_encoding_convert( + &self, + content_encoding: &str, + ) -> Option { + ValidationOptions::get_content_encoding_convert(self, content_encoding) + } + + fn get_keyword_factory(&self, name: &str) -> Option<&Arc> { + ValidationOptions::get_keyword_factory(self, name) + } + + fn get_format(&self, format: &str) -> Option<(&String, &Arc)> { + ValidationOptions::get_format(self, format) + } + + fn pattern_options(&self) -> PatternEngineOptions { + ValidationOptions::compiler_pattern_options(self) + } + + fn email_options(&self) -> Option<&email_address::Options> { + ValidationOptions::compiler_email_options(self) + } +} + #[derive(Hash, PartialEq, Eq, Clone, Debug)] pub(crate) struct LocationCacheKey { pub(crate) base_uri: Arc>, @@ -116,10 +171,9 @@ impl SharedContextState { } /// Per-location view used while compiling schemas into validators. -#[derive(Debug, Clone)] +#[derive(Clone)] pub(crate) struct Context<'a> { - config: &'a ValidationOptions, - pub(crate) registry: &'a Registry, + config: &'a dyn CompilationOptions, resolver: Resolver<'a>, vocabularies: VocabularySet, location: Location, @@ -145,8 +199,7 @@ pub(crate) struct Context<'a> { impl<'a> Context<'a> { pub(crate) fn new( - config: &'a ValidationOptions, - registry: &'a Registry, + config: &'a dyn CompilationOptions, resolver: Resolver<'a>, vocabularies: VocabularySet, draft: Draft, @@ -154,7 +207,6 @@ impl<'a> Context<'a> { ) -> Self { Context { config, - registry, resolver, resource_base: location.clone(), location, @@ -166,7 +218,7 @@ impl<'a> Context<'a> { pub(crate) fn draft(&self) -> Draft { self.draft } - pub(crate) fn config(&self) -> &ValidationOptions { + pub(crate) fn config(&self) -> &dyn CompilationOptions { self.config } @@ -178,7 +230,6 @@ impl<'a> Context<'a> { let resolver = self.resolver.in_subresource(resource)?; Ok(Context { config: self.config, - registry: self.registry, resolver, vocabularies: self.vocabularies.clone(), draft: resource.draft(), @@ -196,7 +247,6 @@ impl<'a> Context<'a> { let location = self.location.join(chunk); Context { config: self.config, - registry: self.registry, resolver: self.resolver.clone(), vocabularies: self.vocabularies.clone(), resource_base: self.resource_base.clone(), @@ -268,6 +318,9 @@ impl<'a> Context<'a> { pub(crate) fn supports_integer_valued_numbers(&self) -> bool { !matches!(self.draft, Draft::Draft4) } + pub(crate) fn find_vocabularies(&self, draft: Draft, contents: &Value) -> VocabularySet { + self.resolver.find_vocabularies(draft, contents) + } pub(crate) fn validates_formats_by_default(&self) -> bool { self.config.validate_formats().unwrap_or(matches!( self.draft, @@ -286,7 +339,6 @@ impl<'a> Context<'a> { ) -> Context<'a> { Context { config: self.config, - registry: self.registry, resolver, draft, vocabularies, @@ -668,142 +720,142 @@ impl<'a> Context<'a> { } } -pub(crate) fn build_registry( - config: &ValidationOptions, +pub(crate) fn build_registry<'a>( + config: &'a ValidationOptions<'a>, draft: Draft, - resource: referencing::Resource, - schema_id: Option<&str>, -) -> Result<(Arc, referencing::Uri), referencing::Error> { - let base_uri = if let Some(base_uri) = config.base_uri.as_ref() { - uri::from_str(base_uri)? - } else { - uri::from_str(schema_id.unwrap_or(DEFAULT_BASE_URI))? - }; - - // Build a registry & resolver needed for validator compilation - // Clone resources to drain them without mutating the original config - let pairs = collect_resource_pairs(base_uri.as_str(), resource, config.resources.clone()); - - let registry = if let Some(ref registry) = config.registry { - Arc::new(registry.clone().try_with_resources_and_retriever( - pairs, - &*config.retriever, - draft, - )?) - } else { - Arc::new( - Registry::options() - .draft(draft) - .retriever(Arc::clone(&config.retriever)) - .build(pairs)?, - ) - }; + resource: ResourceRef<'a>, + schema_id: Option<&'a str>, +) -> Result<(referencing::Registry<'a>, referencing::Uri), referencing::Error> { + let base_uri = resolve_base_uri(config.base_uri.as_ref(), schema_id)?; + let registry = referencing::Registry::new() + .retriever(config.retriever.clone()) + .draft(draft) + .add(base_uri.as_str(), resource)? + .prepare()?; Ok((registry, base_uri)) } pub(crate) fn build_validator( - config: &ValidationOptions, + config: &ValidationOptions<'_>, schema: &Value, ) -> Result> { let draft = config.draft_for(schema)?; - let resource_ref = draft.create_resource_ref(schema); // single computation - let resource = draft.create_resource(schema.clone()); - let (registry, base_uri) = build_registry(config, draft, resource, resource_ref.id())?; - let vocabularies = registry.find_vocabularies(draft, schema); - let resolver = registry.resolver(base_uri); - - let ctx = Context::new( - config, - ®istry, - resolver, - vocabularies, - draft, - Location::new(), - ); + let resource = draft.create_resource_ref(schema); // Validate the schema itself if config.validate_schema { validate_schema(draft, schema)?; } - // Finally, compile the validator - let root = compile(&ctx, resource_ref).map_err(ValidationError::to_owned)?; - let draft = config.draft(); - Ok(Validator { root, draft }) + if let Some(registry) = config.registry { + let base_uri = resolve_base_uri(config.base_uri.as_ref(), resource.id())?; + let overlay = registry + .add(base_uri.as_str(), resource)? + .retriever(config.retriever.clone()) + .draft(draft) + .prepare()?; + return build_validator_with_registry(config, schema, draft, resource, &overlay); + } + + let (registry, _) = build_registry(config, draft, resource, resource.id())?; + build_validator_with_registry(config, schema, draft, resource, ®istry) } #[cfg(feature = "resolve-async")] -pub(crate) async fn build_registry_async( - config: &ValidationOptions>, +pub(crate) async fn build_registry_async<'a>( + config: &'a ValidationOptions<'a, Arc>, draft: Draft, - resource: referencing::Resource, - schema_id: Option<&str>, -) -> Result<(Arc, referencing::Uri), referencing::Error> { - let base_uri = if let Some(base_uri) = config.base_uri.as_ref() { - uri::from_str(base_uri)? - } else { - uri::from_str(schema_id.unwrap_or(DEFAULT_BASE_URI))? - }; - - // Build a registry & resolver needed for validator compilation - // Clone resources to drain them without mutating the original config - let pairs = collect_resource_pairs(base_uri.as_str(), resource, config.resources.clone()); - - let registry = if let Some(ref registry) = config.registry { - Arc::new( - registry - .clone() - .try_with_resources_and_retriever_async(pairs, &*config.retriever, draft) - .await?, - ) - } else { - Arc::new( - Registry::options() - .draft(draft) - .async_retriever(Arc::clone(&config.retriever)) - .build(pairs) - .await?, - ) - }; + resource: ResourceRef<'a>, + schema_id: Option<&'a str>, +) -> Result<(referencing::Registry<'a>, referencing::Uri), referencing::Error> { + let base_uri = resolve_base_uri(config.base_uri.as_ref(), schema_id)?; + let registry = referencing::Registry::new() + .async_retriever(config.retriever.clone()) + .draft(draft) + .add(base_uri.as_str(), resource)? + .async_prepare() + .await?; Ok((registry, base_uri)) } #[cfg(feature = "resolve-async")] pub(crate) async fn build_validator_async( - config: &ValidationOptions>, + config: &ValidationOptions<'_, Arc>, schema: &Value, ) -> Result> { let draft = config.draft_for(schema).await?; let resource_ref = draft.create_resource_ref(schema); // single computation - let resource = draft.create_resource(schema.clone()); - let (registry, base_uri) = - build_registry_async(config, draft, resource, resource_ref.id()).await?; - let vocabularies = registry.find_vocabularies(draft, schema); - let resolver = registry.resolver(base_uri); - // HACK: `ValidationOptions` struct has a default type parameter as `Arc` and to - // avoid propagating types everywhere in `Context`, it is easier to just replace the - // retriever to one that implements `Retrieve`, as it is not used anymore anyway. - let config_with_blocking_retriever = config - .clone() - .with_blocking_retriever(crate::retriever::DefaultRetriever); - let ctx = Context::new( - &config_with_blocking_retriever, - ®istry, - resolver, - vocabularies, - draft, - Location::new(), - ); if config.validate_schema { validate_schema(draft, schema)?; } - let root = compile(&ctx, resource_ref).map_err(ValidationError::to_owned)?; + if let Some(registry) = config.registry { + let base_uri = resolve_base_uri(config.base_uri.as_ref(), resource_ref.id())?; + let overlay = registry + .add(base_uri.as_str(), resource_ref)? + .async_retriever(config.retriever.clone()) + .draft(draft) + .async_prepare() + .await?; + return build_validator_with_registry(config, schema, draft, resource_ref, &overlay); + } + + let (registry, _) = + build_registry_async(config, draft, resource_ref, resource_ref.id()).await?; + build_validator_with_registry(config, schema, draft, resource_ref, ®istry) +} + +fn build_validator_with_registry( + config: &ValidationOptions<'_, R>, + schema: &Value, + draft: Draft, + resource: ResourceRef<'_>, + registry: &referencing::Registry<'_>, +) -> Result> { + let requested_base_uri = resolve_base_uri(config.base_uri.as_ref(), resource.id())?; + let base_uri = normalized_base_uri_for_generated_registry(registry, &requested_base_uri); + let vocabularies = registry.find_vocabularies(draft, schema); + let resolver = registry.resolver(base_uri); + let ctx = Context::new(config, resolver, vocabularies, draft, Location::new()); + let root = compile(&ctx, resource).map_err(ValidationError::to_owned)?; let draft = config.draft(); Ok(Validator { root, draft }) } +pub(crate) fn normalized_base_uri_for_generated_registry( + registry: &referencing::Registry<'_>, + base_uri: &referencing::Uri, +) -> referencing::Uri { + if registry.contains_resource_uri(base_uri.as_str()) { + return base_uri.clone(); + } + + if base_uri + .fragment() + .is_some_and(|fragment| fragment.as_str().is_empty()) + { + let mut normalized = base_uri.clone(); + normalized.set_fragment(None); + if registry.contains_resource_uri(normalized.as_str()) { + return normalized; + } + } + + panic!("generated registry is missing root URI '{base_uri}'"); +} + +pub(crate) fn resolve_base_uri( + base_uri: Option<&String>, + schema_id: Option<&str>, +) -> Result, referencing::Error> { + if let Some(base_uri) = base_uri { + uri::from_str(base_uri) + } else { + uri::from_str(schema_id.unwrap_or(DEFAULT_BASE_URI)) + } +} + fn annotations_to_value(annotations: AHashMap) -> Arc { let mut object = Map::with_capacity(annotations.len()); for (key, value) in annotations { @@ -812,18 +864,6 @@ fn annotations_to_value(annotations: AHashMap) -> Arc { Arc::new(Value::Object(object)) } -fn collect_resource_pairs( - base_uri: &str, - resource: Resource, - resources: AHashMap, -) -> impl IntoIterator, Resource)> { - once((Cow::Borrowed(base_uri), resource)).chain( - resources - .into_iter() - .map(|(uri, resource)| (Cow::Owned(uri), resource)), - ) -} - fn validate_schema(draft: Draft, schema: &Value) -> Result<(), ValidationError<'static>> { // Boolean schemas are always valid per the spec, skip validation if schema.is_boolean() { diff --git a/crates/jsonschema/src/error.rs b/crates/jsonschema/src/error.rs index 11873c57..409b32f2 100644 --- a/crates/jsonschema/src/error.rs +++ b/crates/jsonschema/src/error.rs @@ -1790,7 +1790,7 @@ impl fmt::Display for MaskedValidationError<'_, '_, '_> { #[cfg(test)] mod tests { use super::*; - use referencing::Resource; + use referencing::{Registry, Resource}; use serde_json::json; use test_case::test_case; @@ -2288,11 +2288,16 @@ mod tests { "$ref": "https://example.com/string.json" }); let instance = serde_json::json!(42); - let validator = crate::options() - .with_resource( + let registry = Registry::new() + .add( "https://example.com/string.json", Resource::from_contents(external), ) + .expect("external schema should be accepted") + .prepare() + .expect("registry should build"); + let validator = crate::options() + .with_registry(®istry) .build(&schema) .expect("schema should compile"); let err = validator.validate(&instance).unwrap_err(); diff --git a/crates/jsonschema/src/keywords/ref_.rs b/crates/jsonschema/src/keywords/ref_.rs index 3e5d6df5..745e92d7 100644 --- a/crates/jsonschema/src/keywords/ref_.rs +++ b/crates/jsonschema/src/keywords/ref_.rs @@ -190,7 +190,7 @@ fn compile_reference_validator<'a>( Ok(resolved) => resolved.into_inner(), Err(error) => return Some(Err(ValidationError::from(error))), }; - let vocabularies = ctx.registry.find_vocabularies(draft, contents); + let vocabularies = ctx.find_vocabularies(draft, contents); let resource_ref = draft.create_resource_ref(contents); let inner_ctx = ctx.with_resolver_and_draft( resolver, @@ -241,7 +241,7 @@ fn compile_recursive_validator<'a>( .lookup_recursive_reference() .map_err(ValidationError::from)?; let (contents, resolver, draft) = resolved.into_inner(); - let vocabularies = ctx.registry.find_vocabularies(draft, contents); + let vocabularies = ctx.find_vocabularies(draft, contents); let resource_ref = draft.create_resource_ref(contents); let inner_ctx = ctx.with_resolver_and_draft( resolver, diff --git a/crates/jsonschema/src/keywords/unevaluated_items.rs b/crates/jsonschema/src/keywords/unevaluated_items.rs index 8cb45dbd..4eab1ae2 100644 --- a/crates/jsonschema/src/keywords/unevaluated_items.rs +++ b/crates/jsonschema/src/keywords/unevaluated_items.rs @@ -358,7 +358,7 @@ fn compile_recursive_ref<'a>( // Create context for the resolved reference and check its cache key let (contents, resolver, draft) = resolved.into_inner(); if let Value::Object(subschema) = &contents { - let vocabularies = ctx.registry.find_vocabularies(draft, contents); + let vocabularies = ctx.find_vocabularies(draft, contents); let ref_ctx = ctx.with_resolver_and_draft(resolver, draft, vocabularies, ctx.location().clone()); diff --git a/crates/jsonschema/src/keywords/unevaluated_properties.rs b/crates/jsonschema/src/keywords/unevaluated_properties.rs index 93cf7047..c086c65c 100644 --- a/crates/jsonschema/src/keywords/unevaluated_properties.rs +++ b/crates/jsonschema/src/keywords/unevaluated_properties.rs @@ -484,7 +484,7 @@ fn compile_ref<'a>( let (contents, resolver, draft) = resolved.into_inner(); if let Value::Object(subschema) = &contents { - let vocabularies = ctx.registry.find_vocabularies(draft, contents); + let vocabularies = ctx.find_vocabularies(draft, contents); let ref_ctx = ctx.with_resolver_and_draft(resolver, draft, vocabularies, ctx.location().clone()); let validators = @@ -507,7 +507,7 @@ fn compile_dynamic_ref<'a>( let (contents, resolver, draft) = resolved.into_inner(); if let Value::Object(subschema) = &contents { - let vocabularies = ctx.registry.find_vocabularies(draft, contents); + let vocabularies = ctx.find_vocabularies(draft, contents); let ref_ctx = ctx.with_resolver_and_draft(resolver, draft, vocabularies, ctx.location().clone()); let validators = @@ -534,7 +534,7 @@ fn compile_recursive_ref<'a>( // Create context for the resolved reference and check its cache key let (contents, resolver, draft) = resolved.into_inner(); if let Value::Object(subschema) = &contents { - let vocabularies = ctx.registry.find_vocabularies(draft, contents); + let vocabularies = ctx.find_vocabularies(draft, contents); let ref_ctx = ctx.with_resolver_and_draft(resolver, draft, vocabularies, ctx.location().clone()); diff --git a/crates/jsonschema/src/lib.rs b/crates/jsonschema/src/lib.rs index 8e76b8cd..1e7d6f12 100644 --- a/crates/jsonschema/src/lib.rs +++ b/crates/jsonschema/src/lib.rs @@ -531,7 +531,7 @@ //! ```rust //! # fn main() -> Result<(), Box> { //! use serde_json::json; -//! use jsonschema::Resource; +//! use jsonschema::{Registry, Resource}; //! //! // Root schema with multiple definitions //! let root_schema = json!({ @@ -559,9 +559,13 @@ //! // Create a schema that references the specific definition you want to validate against //! let user_schema = json!({"$ref": "https://example.com/root#/definitions/User"}); //! -//! // Register the root schema and build validator for the specific definition +//! let registry = Registry::new() +//! .add("https://example.com/root", root_schema)? +//! .prepare()?; +//! +//! // Build validator for the specific definition via the shared prepared registry //! let validator = jsonschema::options() -//! .with_resource("https://example.com/root", Resource::from_contents(root_schema)) +//! .with_registry(®istry) //! .build(&user_schema)?; //! //! // Now validate data against just the User definition @@ -887,7 +891,7 @@ pub use http::HttpOptions; pub use keywords::custom::Keyword; pub use options::{EmailOptions, FancyRegex, PatternOptions, Regex, ValidationOptions}; pub use referencing::{ - Draft, Error as ReferencingError, Registry, RegistryOptions, Resource, Retrieve, Uri, + Draft, Error as ReferencingError, Registry, RegistryBuilder, Resource, Retrieve, Uri, }; #[cfg(all(feature = "resolve-http", not(target_arch = "wasm32")))] pub use retriever::{HttpRetriever, HttpRetrieverError}; @@ -1165,7 +1169,7 @@ pub async fn async_validator_for(schema: &Value) -> Result ValidationOptions { +pub fn options<'i>() -> ValidationOptions<'i> { Validator::options() } @@ -1231,15 +1235,15 @@ pub fn options() -> ValidationOptions { /// See [`ValidationOptions`] for all available configuration options. #[cfg(feature = "resolve-async")] #[must_use] -pub fn async_options() -> ValidationOptions> { +pub fn async_options<'i>() -> ValidationOptions<'i, std::sync::Arc> { Validator::async_options() } /// Functionality for validating JSON Schema documents against their meta-schemas. pub mod meta { - use crate::{error::ValidationError, Draft}; + use crate::{error::ValidationError, Draft, Registry}; use ahash::AHashSet; - use referencing::{Registry, Retrieve}; + use referencing::Retrieve; use serde_json::Value; pub use validator_handle::MetaValidator; @@ -1257,10 +1261,11 @@ pub mod meta { /// "type": "object" /// })); /// - /// let registry = Registry::try_new( - /// "http://example.com/meta", - /// custom_meta - /// ).unwrap(); + /// let registry = Registry::new() + /// .add("http://example.com/meta", custom_meta) + /// .unwrap() + /// .prepare() + /// .unwrap(); /// /// let schema = json!({ /// "$schema": "http://example.com/meta", @@ -1268,21 +1273,21 @@ pub mod meta { /// }); /// /// assert!(jsonschema::meta::options() - /// .with_registry(registry) + /// .with_registry(®istry) /// .is_valid(&schema)); /// ``` #[must_use] - pub fn options() -> MetaSchemaOptions { + pub fn options<'a>() -> MetaSchemaOptions<'a> { MetaSchemaOptions::default() } /// Options for meta-schema validation. #[derive(Clone, Default)] - pub struct MetaSchemaOptions { - registry: Option, + pub struct MetaSchemaOptions<'a> { + registry: Option<&'a Registry<'a>>, } - impl MetaSchemaOptions { + impl<'a> MetaSchemaOptions<'a> { /// Use a registry for resolving custom meta-schemas. /// /// # Examples @@ -1296,16 +1301,17 @@ pub mod meta { /// "type": "object" /// })); /// - /// let registry = Registry::try_new( - /// "http://example.com/meta", - /// custom_meta - /// ).unwrap(); + /// let registry = Registry::new() + /// .add("http://example.com/meta", custom_meta) + /// .unwrap() + /// .prepare() + /// .unwrap(); /// /// let options = jsonschema::meta::options() - /// .with_registry(registry); + /// .with_registry(®istry); /// ``` #[must_use] - pub fn with_registry(mut self, registry: Registry) -> Self { + pub fn with_registry(mut self, registry: &'a Registry<'a>) -> Self { self.registry = Some(registry); self } @@ -1317,7 +1323,7 @@ pub mod meta { /// Panics if the meta-schema cannot be resolved. #[must_use] pub fn is_valid(&self, schema: &Value) -> bool { - match try_meta_validator_for(schema, self.registry.as_ref()) { + match try_meta_validator_for(schema, self.registry) { Ok(validator) => validator.as_ref().is_valid(schema), Err(e) => panic!("Failed to resolve meta-schema: {e}"), } @@ -1328,8 +1334,11 @@ pub mod meta { /// # Errors /// /// Returns [`ValidationError`] if the schema is invalid or if the meta-schema cannot be resolved. - pub fn validate<'a>(&self, schema: &'a Value) -> Result<(), ValidationError<'a>> { - let validator = try_meta_validator_for(schema, self.registry.as_ref())?; + pub fn validate<'schema>( + &self, + schema: &'schema Value, + ) -> Result<(), ValidationError<'schema>> { + let validator = try_meta_validator_for(schema, self.registry)?; validator.as_ref().validate(schema) } } @@ -1576,7 +1585,7 @@ pub mod meta { fn try_meta_validator_for<'a>( schema: &Value, - registry: Option<&Registry>, + registry: Option<&'a Registry<'a>>, ) -> Result, ValidationError<'static>> { let draft = Draft::default().detect(schema); @@ -1593,7 +1602,8 @@ pub mod meta { resolve_meta_schema_with_registry(meta_schema_uri, registry)?; let validator = crate::options() .with_draft(resolved_draft) - .with_registry(registry.clone()) + .with_registry(registry) + .with_base_uri(meta_schema_uri.trim_end_matches('#')) .without_schema_validation() .build(&custom_meta_schema)?; return Ok(MetaValidator::owned(validator)); @@ -1615,14 +1625,14 @@ pub mod meta { fn resolve_meta_schema_with_registry( uri: &str, - registry: &Registry, + registry: &Registry<'_>, ) -> Result<(Value, Draft), ValidationError<'static>> { - let resolver = registry.try_resolver(uri)?; + let resolver = registry.resolver(referencing::uri::from_str(uri)?); let first_resolved = resolver.lookup("")?; let first_meta_schema = first_resolved.contents().clone(); let draft = walk_meta_schema_chain(uri, |current_uri| { - let resolver = registry.try_resolver(current_uri)?; + let resolver = registry.resolver(referencing::uri::from_str(current_uri)?); let resolved = resolver.lookup("")?; Ok(resolved.contents().clone()) })?; @@ -1790,7 +1800,7 @@ pub mod draft4 { /// /// See [`ValidationOptions`] for all available configuration options. #[must_use] - pub fn options() -> ValidationOptions { + pub fn options<'i>() -> ValidationOptions<'i> { crate::options().with_draft(Draft::Draft4) } @@ -1971,7 +1981,7 @@ pub mod draft6 { /// /// See [`ValidationOptions`] for all available configuration options. #[must_use] - pub fn options() -> ValidationOptions { + pub fn options<'i>() -> ValidationOptions<'i> { crate::options().with_draft(Draft::Draft6) } @@ -2152,7 +2162,7 @@ pub mod draft7 { /// /// See [`ValidationOptions`] for all available configuration options. #[must_use] - pub fn options() -> ValidationOptions { + pub fn options<'i>() -> ValidationOptions<'i> { crate::options().with_draft(Draft::Draft7) } @@ -2333,7 +2343,7 @@ pub mod draft201909 { /// /// See [`ValidationOptions`] for all available configuration options. #[must_use] - pub fn options() -> ValidationOptions { + pub fn options<'i>() -> ValidationOptions<'i> { crate::options().with_draft(Draft::Draft201909) } @@ -2516,7 +2526,7 @@ pub mod draft202012 { /// /// See [`ValidationOptions`] for all available configuration options. #[must_use] - pub fn options() -> ValidationOptions { + pub fn options<'i>() -> ValidationOptions<'i> { crate::options().with_draft(Draft::Draft202012) } @@ -2770,8 +2780,7 @@ pub(crate) mod tests_util { #[cfg(test)] mod tests { - use crate::{validator_for, ValidationError}; - use referencing::{Registry, Resource}; + use crate::{validator_for, Registry, ValidationError}; use super::Draft; use serde_json::json; @@ -3233,15 +3242,14 @@ mod tests { } }); - // Register the custom meta-schema as a resource - let resources = vec![( - "http://custom.example.com/schema".to_string(), - Resource::from_contents(meta_schema), - )]; - + let registry = Registry::new() + .add("http://custom.example.com/schema", meta_schema) + .expect("Should accept meta-schema") + .prepare() + .expect("Should create registry"); let validator = crate::options() .without_schema_validation() - .with_resources(resources.into_iter()) + .with_registry(®istry) .build(&schema) .expect("Should build validator"); @@ -3367,21 +3375,14 @@ mod tests { "type": "object" }); - // Build the validator with both the meta-schema and the element schema as resources - let resources = vec![ - ( - "http://example.com/meta/schema".to_string(), - referencing::Resource::from_contents(meta_schema), - ), - ( - "http://example.com/schemas/element".to_string(), - referencing::Resource::from_contents(element_schema.clone()), - ), - ]; - + let registry = Registry::new() + .add("http://example.com/meta/schema", meta_schema) + .expect("Should accept meta-schema") + .prepare() + .expect("Should create registry"); let validator = crate::options() .without_schema_validation() - .with_resources(resources.into_iter()) + .with_registry(®istry) .build(&element_schema) .expect("Should successfully build validator with custom meta-schema"); @@ -3410,13 +3411,14 @@ mod tests { } }); - let registry = Registry::try_new( - "http://example.com/custom-with-unevaluated", - Resource::from_contents(custom_meta), - ) - .expect("Should create registry"); + let registry = Registry::new() + .add("http://example.com/custom-with-unevaluated", custom_meta) + .expect("Should accept meta-schema") + .prepare() + .expect("Should create registry"); let schema = json!({ + "$id": "http://example.com/subject", "$schema": "http://example.com/custom-with-unevaluated#", "type": "object", "properties": { @@ -3427,7 +3429,7 @@ mod tests { let validator = crate::options() .without_schema_validation() - .with_registry(registry) + .with_registry(®istry) .build(&schema) .expect("Should build validator"); @@ -3448,11 +3450,11 @@ mod tests { "unevaluatedProperties": false }); - let registry = Registry::try_new( - "https://json-schema.org/draft/2020-12/strict", - Resource::from_contents(strict_meta), - ) - .expect("Should create registry"); + let registry = Registry::new() + .add("https://json-schema.org/draft/2020-12/strict", strict_meta) + .expect("Should accept strict meta-schema") + .prepare() + .expect("Should create registry"); // Valid schema - all keywords are recognized let valid_schema = json!({ @@ -3464,7 +3466,7 @@ mod tests { }); assert!(crate::meta::options() - .with_registry(registry.clone()) + .with_registry(®istry) .is_valid(&valid_schema)); // Invalid schema - top-level typo "typ" instead of "type" @@ -3474,7 +3476,7 @@ mod tests { }); assert!(!crate::meta::options() - .with_registry(registry.clone()) + .with_registry(®istry) .is_valid(&invalid_schema_top_level)); // Invalid schema - nested invalid keyword "minSize" (not a real JSON Schema keyword) @@ -3487,7 +3489,7 @@ mod tests { }); assert!(!crate::meta::options() - .with_registry(registry) + .with_registry(®istry) .is_valid(&invalid_schema_nested)); } @@ -3506,13 +3508,14 @@ mod tests { } }); - let registry = Registry::try_new( - "http://example.com/meta/draft7-custom", - Resource::from_contents(custom_meta_draft7), - ) - .expect("Should create registry"); + let registry = Registry::new() + .add("http://example.com/meta/draft7-custom", custom_meta_draft7) + .expect("Should accept meta-schema") + .prepare() + .expect("Should create registry"); let schema = json!({ + "$id": "http://example.com/subject", "$schema": "http://example.com/meta/draft7-custom", "$ref": "#/$defs/positiveNumber", "maximum": 5, @@ -3526,7 +3529,7 @@ mod tests { let validator = crate::options() .without_schema_validation() - .with_registry(registry) + .with_registry(®istry) .build(&schema) .expect("Should build validator"); @@ -3537,11 +3540,11 @@ mod tests { mod meta_options_tests { use super::*; - use crate::{Registry, Resource}; + use crate::Registry; #[test] fn test_meta_options_with_registry_valid_schema() { - let custom_meta = Resource::from_contents(json!({ + let custom_meta = json!({ "$schema": "https://json-schema.org/draft/2020-12/schema", "type": "object", "properties": { @@ -3550,9 +3553,13 @@ mod tests { "maxLength": { "type": "integer" } }, "additionalProperties": false - })); + }); - let registry = Registry::try_new("http://example.com/meta", custom_meta).unwrap(); + let registry = Registry::new() + .add("http://example.com/meta", custom_meta) + .unwrap() + .prepare() + .unwrap(); let schema = json!({ "$schema": "http://example.com/meta", @@ -3561,27 +3568,31 @@ mod tests { }); assert!(crate::meta::options() - .with_registry(registry.clone()) + .with_registry(®istry) .is_valid(&schema)); assert!(crate::meta::options() - .with_registry(registry) + .with_registry(®istry) .validate(&schema) .is_ok()); } #[test] fn test_meta_options_with_registry_invalid_schema() { - let custom_meta = Resource::from_contents(json!({ + let custom_meta = json!({ "$schema": "https://json-schema.org/draft/2020-12/schema", "type": "object", "properties": { "type": { "type": "string" } }, "additionalProperties": false - })); + }); - let registry = Registry::try_new("http://example.com/meta", custom_meta).unwrap(); + let registry = Registry::new() + .add("http://example.com/meta", custom_meta) + .unwrap() + .prepare() + .unwrap(); // Schema has disallowed property let schema = json!({ @@ -3591,11 +3602,11 @@ mod tests { }); assert!(!crate::meta::options() - .with_registry(registry.clone()) + .with_registry(®istry) .is_valid(&schema)); assert!(crate::meta::options() - .with_registry(registry) + .with_registry(®istry) .validate(&schema) .is_err()); } @@ -3603,12 +3614,16 @@ mod tests { #[test] fn test_meta_options_with_registry_chain() { // Create a chain: custom-meta -> draft2020-12 - let custom_meta = Resource::from_contents(json!({ + let custom_meta = json!({ "$schema": "https://json-schema.org/draft/2020-12/schema", "type": "object" - })); + }); - let registry = Registry::try_new("http://example.com/custom", custom_meta).unwrap(); + let registry = Registry::new() + .add("http://example.com/custom", custom_meta) + .unwrap() + .prepare() + .unwrap(); let schema = json!({ "$schema": "http://example.com/custom", @@ -3616,34 +3631,37 @@ mod tests { }); assert!(crate::meta::options() - .with_registry(registry) + .with_registry(®istry) .is_valid(&schema)); } #[test] fn test_meta_options_with_registry_multi_level_chain() { // Create chain: schema -> meta-level-2 -> meta-level-1 -> draft2020-12 - let meta_level_1 = Resource::from_contents(json!({ + let meta_level_1 = json!({ "$id": "http://example.com/meta/level1", "$schema": "https://json-schema.org/draft/2020-12/schema", "type": "object", "properties": { "customProp": { "type": "boolean" } } - })); + }); - let meta_level_2 = Resource::from_contents(json!({ + let meta_level_2 = json!({ "$id": "http://example.com/meta/level2", "$schema": "http://example.com/meta/level1", "type": "object", "customProp": true - })); + }); - let registry = Registry::try_from_resources([ - ("http://example.com/meta/level1", meta_level_1), - ("http://example.com/meta/level2", meta_level_2), - ]) - .unwrap(); + let registry = Registry::new() + .extend([ + ("http://example.com/meta/level1", meta_level_1), + ("http://example.com/meta/level2", meta_level_2), + ]) + .unwrap() + .prepare() + .unwrap(); let schema = json!({ "$schema": "http://example.com/meta/level2", @@ -3652,22 +3670,22 @@ mod tests { }); assert!(crate::meta::options() - .with_registry(registry) + .with_registry(®istry) .is_valid(&schema)); } #[test] fn test_meta_options_with_registry_multi_document_meta_schema() { - let shared_constraints = Resource::from_contents(json!({ + let shared_constraints = json!({ "$id": "http://example.com/meta/shared", "$schema": "https://json-schema.org/draft/2020-12/schema", "type": "object", "properties": { "maxLength": { "type": "integer", "minimum": 0 } } - })); + }); - let root_meta = Resource::from_contents(json!({ + let root_meta = json!({ "$id": "http://example.com/meta/root", "$schema": "https://json-schema.org/draft/2020-12/schema", "type": "object", @@ -3678,13 +3696,16 @@ mod tests { "allOf": [ { "$ref": "http://example.com/meta/shared" } ] - })); + }); - let registry = Registry::try_from_resources([ - ("http://example.com/meta/root", root_meta), - ("http://example.com/meta/shared", shared_constraints), - ]) - .unwrap(); + let registry = Registry::new() + .extend([ + ("http://example.com/meta/root", root_meta), + ("http://example.com/meta/shared", shared_constraints), + ]) + .unwrap() + .prepare() + .unwrap(); let schema = json!({ "$schema": "http://example.com/meta/root", @@ -3693,7 +3714,7 @@ mod tests { }); let result = crate::meta::options() - .with_registry(registry.clone()) + .with_registry(®istry) .validate(&schema); assert!( @@ -3703,7 +3724,7 @@ mod tests { ); assert!(crate::meta::options() - .with_registry(registry) + .with_registry(®istry) .is_valid(&schema)); } @@ -3733,12 +3754,16 @@ mod tests { #[test] fn test_meta_options_with_registry_missing_metaschema() { - let custom_meta = Resource::from_contents(json!({ + let custom_meta = json!({ "$schema": "https://json-schema.org/draft/2020-12/schema", "type": "object" - })); + }); - let registry = Registry::try_new("http://example.com/meta1", custom_meta).unwrap(); + let registry = Registry::new() + .add("http://example.com/meta1", custom_meta) + .unwrap() + .prepare() + .unwrap(); // Schema references a different meta-schema not in registry let schema = json!({ @@ -3747,7 +3772,7 @@ mod tests { }); let result = crate::meta::options() - .with_registry(registry) + .with_registry(®istry) .validate(&schema); assert!(result.is_err()); @@ -3756,23 +3781,26 @@ mod tests { #[test] fn test_meta_options_circular_reference_detection() { // Create a circular reference: meta1 -> meta2 -> meta1 - let meta1 = Resource::from_contents(json!({ + let meta1 = json!({ "$id": "http://example.com/meta1", "$schema": "http://example.com/meta2", "type": "object" - })); + }); - let meta2 = Resource::from_contents(json!({ + let meta2 = json!({ "$id": "http://example.com/meta2", "$schema": "http://example.com/meta1", "type": "object" - })); + }); - let registry = Registry::try_from_resources([ - ("http://example.com/meta1", meta1), - ("http://example.com/meta2", meta2), - ]) - .unwrap(); + let registry = Registry::new() + .extend([ + ("http://example.com/meta1", meta1), + ("http://example.com/meta2", meta2), + ]) + .unwrap() + .prepare() + .unwrap(); let schema = json!({ "$schema": "http://example.com/meta1", @@ -3780,7 +3808,7 @@ mod tests { }); let result = crate::meta::options() - .with_registry(registry) + .with_registry(®istry) .validate(&schema); assert!(result.is_err()); @@ -3810,13 +3838,17 @@ mod tests { #[test] fn test_meta_options_validate_returns_specific_errors() { - let custom_meta = Resource::from_contents(json!({ + let custom_meta = json!({ "$schema": "https://json-schema.org/draft/2020-12/schema", "type": "object", "required": ["type"] - })); + }); - let registry = Registry::try_new("http://example.com/meta", custom_meta).unwrap(); + let registry = Registry::new() + .add("http://example.com/meta", custom_meta) + .unwrap() + .prepare() + .unwrap(); // Schema missing required property let schema = json!({ @@ -3827,7 +3859,7 @@ mod tests { }); let result = crate::meta::options() - .with_registry(registry) + .with_registry(®istry) .validate(&schema); assert!(result.is_err()); @@ -3837,7 +3869,7 @@ mod tests { #[test] fn test_meta_options_builds_validator_with_resolved_draft() { - let custom_meta = Resource::from_contents(json!({ + let custom_meta = json!({ "$id": "http://example.com/meta/draft7-based", "$schema": "http://json-schema.org/draft-07/schema#", "type": "object", @@ -3847,10 +3879,13 @@ mod tests { "minLength": { "type": "integer" } }, "additionalProperties": false - })); + }); - let registry = - Registry::try_new("http://example.com/meta/draft7-based", custom_meta).unwrap(); + let registry = Registry::new() + .add("http://example.com/meta/draft7-based", custom_meta) + .unwrap() + .prepare() + .unwrap(); let schema = json!({ "$schema": "http://example.com/meta/draft7-based", @@ -3859,7 +3894,7 @@ mod tests { }); let result = crate::meta::options() - .with_registry(registry) + .with_registry(®istry) .validate(&schema); assert!(result.is_ok()); @@ -3867,7 +3902,7 @@ mod tests { #[test] fn test_meta_options_validator_uses_correct_draft() { - let custom_meta_draft6 = Resource::from_contents(json!({ + let custom_meta_draft6 = json!({ "$id": "http://example.com/meta/draft6-based", "$schema": "http://json-schema.org/draft-06/schema#", "type": "object", @@ -3877,11 +3912,13 @@ mod tests { "exclusiveMinimum": { "type": "number" } }, "additionalProperties": false - })); + }); - let registry = - Registry::try_new("http://example.com/meta/draft6-based", custom_meta_draft6) - .unwrap(); + let registry = Registry::new() + .add("http://example.com/meta/draft6-based", custom_meta_draft6) + .unwrap() + .prepare() + .unwrap(); let schema_valid_for_draft6 = json!({ "$schema": "http://example.com/meta/draft6-based", @@ -3890,7 +3927,7 @@ mod tests { }); let result = crate::meta::options() - .with_registry(registry) + .with_registry(®istry) .validate(&schema_valid_for_draft6); assert!(result.is_ok()); @@ -3898,7 +3935,7 @@ mod tests { #[test] fn test_meta_options_without_schema_validation_in_built_validator() { - let custom_meta = Resource::from_contents(json!({ + let custom_meta = json!({ "$id": "http://example.com/meta/custom", "$schema": "https://json-schema.org/draft/2020-12/schema", "type": "object", @@ -3907,10 +3944,13 @@ mod tests { "type": { "type": "string" } }, "additionalProperties": false - })); + }); - let registry = - Registry::try_new("http://example.com/meta/custom", custom_meta).unwrap(); + let registry = Registry::new() + .add("http://example.com/meta/custom", custom_meta) + .unwrap() + .prepare() + .unwrap(); let schema = json!({ "$schema": "http://example.com/meta/custom", @@ -3918,7 +3958,7 @@ mod tests { }); let result = crate::meta::options() - .with_registry(registry) + .with_registry(®istry) .validate(&schema); assert!(result.is_ok()); @@ -3928,7 +3968,7 @@ mod tests { fn test_meta_validation_uses_resolved_draft_from_chain() { // Chain: user-schema -> custom-meta -> Draft 4 // Validator should use Draft 4 rules to validate the schema - let custom_meta = Resource::from_contents(json!({ + let custom_meta = json!({ "$id": "http://example.com/meta/draft4-based", "$schema": "http://json-schema.org/draft-04/schema#", "type": "object", @@ -3939,10 +3979,13 @@ mod tests { "const": { "type": "string" } }, "additionalProperties": false - })); + }); - let registry = - Registry::try_new("http://example.com/meta/draft4-based", custom_meta).unwrap(); + let registry = Registry::new() + .add("http://example.com/meta/draft4-based", custom_meta) + .unwrap() + .prepare() + .unwrap(); let schema = json!({ "$schema": "http://example.com/meta/draft4-based", @@ -3951,7 +3994,7 @@ mod tests { }); let result = crate::meta::options() - .with_registry(registry) + .with_registry(®istry) .validate(&schema); assert!(result.is_ok()); @@ -3960,16 +4003,16 @@ mod tests { #[test] fn test_meta_validation_multi_level_chain_uses_resolved_draft() { // Multi-level chain: user-schema -> meta-2 -> meta-1 -> Draft 4 - let meta_level_1 = Resource::from_contents(json!({ + let meta_level_1 = json!({ "$id": "http://example.com/meta/level1", "$schema": "http://json-schema.org/draft-04/schema#", "type": "object", "properties": { "customKeyword": { "type": "boolean" } } - })); + }); - let meta_level_2 = Resource::from_contents(json!({ + let meta_level_2 = json!({ "$id": "http://example.com/meta/level2", "$schema": "http://example.com/meta/level1", "type": "object", @@ -3981,13 +4024,16 @@ mod tests { }, "customKeyword": true, "additionalProperties": false - })); + }); - let registry = Registry::try_from_resources([ - ("http://example.com/meta/level1", meta_level_1), - ("http://example.com/meta/level2", meta_level_2), - ]) - .unwrap(); + let registry = Registry::new() + .extend([ + ("http://example.com/meta/level1", meta_level_1), + ("http://example.com/meta/level2", meta_level_2), + ]) + .unwrap() + .prepare() + .unwrap(); let schema = json!({ "$schema": "http://example.com/meta/level2", @@ -3997,7 +4043,7 @@ mod tests { }); let result = crate::meta::options() - .with_registry(registry) + .with_registry(®istry) .validate(&schema); assert!(result.is_ok()); @@ -4084,7 +4130,6 @@ mod tests { #[cfg(all(test, feature = "resolve-async", not(target_family = "wasm")))] mod async_tests { - use referencing::Resource; use std::{collections::HashMap, sync::Arc}; use serde_json::json; @@ -4231,36 +4276,19 @@ mod async_tests { } #[tokio::test] - async fn test_async_with_registry() { - use crate::Registry; - - // Create a registry with initial schemas - let registry = Registry::options() - .async_retriever(TestRetriever::new()) - .build([( - "https://example.com/user.json", - Resource::from_contents(json!({ - "type": "object", - "properties": { - "name": {"type": "string"}, - "age": {"type": "integer", "minimum": 0} - }, - "required": ["name"] - })), - )]) - .await - .unwrap(); + async fn test_async_with_registry_uses_async_retriever_for_inline_only_refs() { + let registry = crate::Registry::new().prepare().unwrap(); + let schema = json!({ + "$ref": "https://example.com/user.json" + }); - // Create a validator using the pre-populated registry let validator = crate::async_options() - .with_registry(registry) - .build(&json!({ - "$ref": "https://example.com/user.json" - })) + .with_registry(®istry) + .with_retriever(TestRetriever::new()) + .build(&schema) .await .unwrap(); - // Verify that validation works with the registry assert!(validator.is_valid(&json!({ "name": "John Doe", "age": 30 diff --git a/crates/jsonschema/src/options.rs b/crates/jsonschema/src/options.rs index 4975df79..903c8f67 100644 --- a/crates/jsonschema/src/options.rs +++ b/crates/jsonschema/src/options.rs @@ -12,13 +12,13 @@ use crate::{ }; use ahash::AHashMap; use email_address::Options as EmailAddressOptions; -use referencing::{Draft, Resource, Retrieve}; +use referencing::{Draft, Retrieve}; use serde_json::Value; use std::{fmt, marker::PhantomData, sync::Arc}; /// Configuration options for JSON Schema validation. #[derive(Clone)] -pub struct ValidationOptions> { +pub struct ValidationOptions<'i, R = Arc> { pub(crate) draft: Option, content_media_type_checks: AHashMap<&'static str, Option>, content_encoding_checks_and_converters: @@ -26,9 +26,7 @@ pub struct ValidationOptions> { pub(crate) base_uri: Option, /// Retriever for external resources pub(crate) retriever: R, - /// Additional resources that should be addressable during validation. - pub(crate) resources: AHashMap, - pub(crate) registry: Option, + pub(crate) registry: Option<&'i referencing::Registry<'i>>, formats: AHashMap>, validate_formats: Option, pub(crate) validate_schema: bool, @@ -38,7 +36,7 @@ pub struct ValidationOptions> { email_options: Option, } -impl Default for ValidationOptions> { +impl Default for ValidationOptions<'_, Arc> { fn default() -> Self { ValidationOptions { draft: None, @@ -46,7 +44,6 @@ impl Default for ValidationOptions> { content_encoding_checks_and_converters: AHashMap::default(), base_uri: None, retriever: Arc::new(DefaultRetriever), - resources: AHashMap::default(), registry: None, formats: AHashMap::default(), validate_formats: None, @@ -60,7 +57,7 @@ impl Default for ValidationOptions> { } #[cfg(feature = "resolve-async")] -impl Default for ValidationOptions> { +impl Default for ValidationOptions<'_, Arc> { fn default() -> Self { ValidationOptions { draft: None, @@ -68,7 +65,6 @@ impl Default for ValidationOptions> { content_encoding_checks_and_converters: AHashMap::default(), base_uri: None, retriever: Arc::new(DefaultRetriever), - resources: AHashMap::default(), registry: None, formats: AHashMap::default(), validate_formats: None, @@ -81,11 +77,34 @@ impl Default for ValidationOptions> { } } -impl ValidationOptions { +impl<'i, R> ValidationOptions<'i, R> { /// Return the draft version, or the default if not set. pub(crate) fn draft(&self) -> Draft { self.draft.unwrap_or_default() } + + pub(crate) fn compiler_pattern_options(&self) -> PatternEngineOptions { + self.pattern_options + } + + pub(crate) fn compiler_email_options(&self) -> Option<&EmailAddressOptions> { + self.email_options.as_ref() + } + + pub(crate) fn resolve_draft_from_registry( + uri: &str, + registry: &referencing::Registry<'_>, + ) -> Result { + let uri = uri.trim_end_matches('#'); + // Walk the meta-schema chain to find the underlying draft. + crate::meta::walk_meta_schema_chain(uri, |current_uri| { + let uri = referencing::uri::from_str(current_uri)?; + let resolver = registry.resolver(uri); + let resolved = resolver.lookup("")?; + Ok(resolved.contents().clone()) + }) + } + /// Sets the JSON Schema draft version. /// /// ```rust @@ -274,99 +293,8 @@ impl ValidationOptions { self.base_uri = Some(base_uri.into()); self } - /// Add a custom schema, allowing it to be referenced by the specified URI during validation. - /// - /// This enables the use of additional in-memory schemas alongside the main schema being validated. - /// - /// # Example - /// - /// ```rust - /// # use serde_json::json; - /// # fn main() -> Result<(), Box> { - /// use jsonschema::Resource; - /// - /// let extra = Resource::from_contents(json!({"minimum": 5})); - /// - /// let validator = jsonschema::options() - /// .with_resource("urn:minimum-schema", extra) - /// .build(&json!({"$ref": "urn:minimum-schema"}))?; - /// assert!(validator.is_valid(&json!(5))); - /// assert!(!validator.is_valid(&json!(4))); - /// # Ok(()) - /// # } - /// ``` #[must_use] - pub fn with_resource(mut self, uri: impl Into, resource: Resource) -> Self { - self.resources.insert(uri.into(), resource); - self - } - /// Add custom schemas, allowing them to be referenced by the specified URI during validation. - /// - /// This enables the use of additional in-memory schemas alongside the main schema being validated. - /// - /// # Example - /// - /// ```rust - /// # use serde_json::json; - /// # fn main() -> Result<(), Box> { - /// use jsonschema::Resource; - /// - /// let validator = jsonschema::options() - /// .with_resources([ - /// ( - /// "urn:minimum-schema", - /// Resource::from_contents(json!({"minimum": 5})), - /// ), - /// ( - /// "urn:maximum-schema", - /// Resource::from_contents(json!({"maximum": 10})), - /// ), - /// ].into_iter()) - /// .build(&json!({"$ref": "urn:minimum-schema"}))?; - /// assert!(validator.is_valid(&json!(5))); - /// assert!(!validator.is_valid(&json!(4))); - /// # Ok(()) - /// # } - /// ``` - #[must_use] - pub fn with_resources( - mut self, - pairs: impl Iterator, Resource)>, - ) -> Self { - for (uri, resource) in pairs { - self.resources.insert(uri.into(), resource); - } - self - } - /// Use external schema resources from the registry, making them accessible via references - /// during validation. - /// - /// # Example - /// - /// ```rust - /// # use serde_json::json; - /// # fn main() -> Result<(), Box> { - /// use jsonschema::{Registry, Resource}; - /// - /// let registry = Registry::try_new( - /// "urn:name-schema", - /// Resource::from_contents(json!({"type": "string"})) - /// )?; - /// let schema = json!({ - /// "properties": { - /// "name": { "$ref": "urn:name-schema" } - /// } - /// }); - /// let validator = jsonschema::options() - /// .with_registry(registry) - /// .build(&schema)?; - /// assert!(validator.is_valid(&json!({ "name": "Valid String" }))); - /// assert!(!validator.is_valid(&json!({ "name": 123 }))); - /// # Ok(()) - /// # } - /// ``` - #[must_use] - pub fn with_registry(mut self, registry: referencing::Registry) -> Self { + pub fn with_registry(mut self, registry: &'i referencing::Registry<'i>) -> Self { self.registry = Some(registry); self } @@ -500,7 +428,7 @@ impl ValidationOptions { } } -impl ValidationOptions> { +impl ValidationOptions<'_, Arc> { /// Build a JSON Schema validator using the current options. /// /// If no draft is set via [`with_draft`](Self::with_draft), the draft is auto-detected @@ -572,15 +500,14 @@ impl ValidationOptions> { let default = Draft::default(); let detected = default.detect(contents); - // If detected draft is Unknown (custom meta-schema), try to resolve it + // If detected draft is Unknown (custom meta-schema), try to resolve it. if detected == Draft::Unknown { - if let Some(registry) = &self.registry { + if let Some(registry) = self.registry { if let Some(meta_schema_uri) = contents .as_object() .and_then(|obj| obj.get("$schema")) .and_then(|s| s.as_str()) { - // Walk the meta-schema chain to find the underlying draft return Self::resolve_draft_from_registry(meta_schema_uri, registry); } } @@ -590,17 +517,6 @@ impl ValidationOptions> { } } - fn resolve_draft_from_registry( - uri: &str, - registry: &referencing::Registry, - ) -> Result { - let uri = uri.trim_end_matches('#'); - crate::meta::walk_meta_schema_chain(uri, |current_uri| { - let resolver = registry.try_resolver(current_uri)?; - let resolved = resolver.lookup("")?; - Ok(resolved.contents().clone()) - }) - } /// Set a retriever to fetch external resources. #[must_use] pub fn with_retriever(mut self, retriever: impl Retrieve + 'static) -> Self { @@ -678,9 +594,6 @@ impl ValidationOptions> { self.pattern_options = options.inner; self } - pub(crate) fn pattern_options(&self) -> PatternEngineOptions { - self.pattern_options - } /// Set email validation options to customize email format validation behavior. /// @@ -702,14 +615,10 @@ impl ValidationOptions> { self.email_options = Some(options.inner); self } - - pub(crate) fn email_options(&self) -> Option<&EmailAddressOptions> { - self.email_options.as_ref() - } } #[cfg(feature = "resolve-async")] -impl ValidationOptions> { +impl<'i> ValidationOptions<'i, Arc> { /// Build a JSON Schema validator using the current async options. /// /// # Errors @@ -736,14 +645,13 @@ impl ValidationOptions> { pub fn with_retriever( self, retriever: impl referencing::AsyncRetrieve + 'static, - ) -> ValidationOptions> { + ) -> ValidationOptions<'i, Arc> { ValidationOptions { draft: self.draft, retriever: Arc::new(retriever), content_media_type_checks: self.content_media_type_checks, content_encoding_checks_and_converters: self.content_encoding_checks_and_converters, base_uri: None, - resources: self.resources, registry: self.registry, formats: self.formats, validate_formats: self.validate_formats, @@ -758,40 +666,31 @@ impl ValidationOptions> { pub(crate) async fn draft_for(&self, contents: &Value) -> Result { // Preference: // - Explicitly set - // - Autodetected + // - Autodetected (with registry resolution for custom meta-schemas) // - Default if let Some(draft) = self.draft { Ok(draft) } else { let default = Draft::default(); - Ok(default.detect(contents)) - } - } - /// Set a retriever to fetch external resources. - pub(crate) fn with_blocking_retriever( - self, - retriever: impl Retrieve + 'static, - ) -> ValidationOptions> { - ValidationOptions { - draft: self.draft, - retriever: Arc::new(retriever), - content_media_type_checks: self.content_media_type_checks, - content_encoding_checks_and_converters: self.content_encoding_checks_and_converters, - base_uri: None, - resources: self.resources, - registry: self.registry, - formats: self.formats, - validate_formats: self.validate_formats, - validate_schema: self.validate_schema, - ignore_unknown_formats: self.ignore_unknown_formats, - keywords: self.keywords, - pattern_options: self.pattern_options, - email_options: self.email_options, + let detected = default.detect(contents); + // If detected draft is Unknown (custom meta-schema), try to resolve it. + if detected == Draft::Unknown { + if let Some(registry) = self.registry { + if let Some(meta_schema_uri) = contents + .as_object() + .and_then(|obj| obj.get("$schema")) + .and_then(|s| s.as_str()) + { + return Self::resolve_draft_from_registry(meta_schema_uri, registry); + } + } + } + Ok(detected) } } } -impl fmt::Debug for ValidationOptions { +impl fmt::Debug for ValidationOptions<'_, Arc> { fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result { fmt.debug_struct("CompilationConfig") .field("draft", &self.draft) @@ -1160,8 +1059,8 @@ impl From for EmailOptions { #[cfg(test)] mod tests { use super::*; - use referencing::{Registry, Resource}; - use serde_json::json; + use referencing::{Registry, Resource, Retrieve, Uri}; + use serde_json::{json, Value}; fn custom(s: &str) -> bool { s.ends_with("42!") @@ -1181,24 +1080,53 @@ mod tests { #[test] fn with_registry() { - let registry = Registry::try_new( - "urn:name-schema", - Resource::from_contents(json!({"type": "string"})), - ) - .expect("Invalid URI"); + let registry = Registry::new() + .add("urn:name-schema", json!({"type": "string"})) + .expect("Invalid URI") + .prepare() + .expect("Registry should prepare"); let schema = json!({ "properties": { "name": { "$ref": "urn:name-schema" } } }); let validator = crate::options() - .with_registry(registry) + .with_registry(®istry) .build(&schema) .expect("Invalid schema"); assert!(validator.is_valid(&json!({ "name": "Valid String" }))); assert!(!validator.is_valid(&json!({ "name": 123 }))); } + struct InlineOnlyRetriever; + + impl Retrieve for InlineOnlyRetriever { + fn retrieve( + &self, + uri: &Uri, + ) -> Result> { + if uri.as_str() == "https://example.com/string.json" { + Ok(json!({"type": "string"})) + } else { + Err(format!("unexpected retrieval for {}", uri.as_str()).into()) + } + } + } + + #[test] + fn with_registry_uses_validation_options_retriever_for_inline_only_refs() { + let shared = Registry::new().prepare().expect("Registry should prepare"); + let schema = json!({"$ref": "https://example.com/string.json"}); + let validator = crate::options() + .with_registry(&shared) + .with_retriever(InlineOnlyRetriever) + .build(&schema) + .expect("Validator should build using the options retriever"); + + assert!(validator.is_valid(&json!("Valid String"))); + assert!(!validator.is_valid(&json!(123))); + } + #[test] fn test_fancy_regex_options_builder() { let options = PatternOptions::fancy_regex() @@ -1226,19 +1154,6 @@ mod tests { let _options = crate::options().with_draft(Draft::Unknown); } - #[test] - fn custom_meta_schema_allowed_when_draft_overridden() { - let schema = json!({ - "$schema": "json-schema:///custom/meta", - "type": "string" - }); - - crate::options() - .with_draft(Draft::Draft7) - .build(&schema) - .expect("Explicit draft override should bypass custom meta-schema registry checks"); - } - #[test] fn test_regex_options_builder() { let options = PatternOptions::regex() @@ -1285,12 +1200,16 @@ mod tests { // Create a schema that references the specific definition let user_schema = json!({"$ref": "https://example.com/root#/definitions/User"}); - // Build validator with the root schema registered as a resource - let validator = crate::options() - .with_resource( + let registry = Registry::new() + .add( "https://example.com/root", Resource::from_contents(root_schema), ) + .unwrap() + .prepare() + .unwrap(); + let validator = crate::options() + .with_registry(®istry) .build(&user_schema) .expect("Valid schema"); diff --git a/crates/jsonschema/src/paths.rs b/crates/jsonschema/src/paths.rs index b1a20b60..b2c0234a 100644 --- a/crates/jsonschema/src/paths.rs +++ b/crates/jsonschema/src/paths.rs @@ -5,7 +5,7 @@ use std::{ sync::{Arc, OnceLock}, }; -use referencing::unescape_segment; +use referencing::{unescape_segment, write_escaped_str, write_index}; use crate::keywords::Keyword; @@ -104,8 +104,12 @@ impl<'a> From<&'a LazyLocation<'_, '_>> for Location { return get_cached_index_paths()[*idx].clone(); } // Single index > 15: compute directly - let mut buf = itoa::Buffer::new(); - return Location(Arc::from(format!("/{}", buf.format(*idx)))); + let mut idx_buffer = itoa::Buffer::new(); + let idx = idx_buffer.format(*idx); + let mut buffer = String::with_capacity(1 + idx.len()); + buffer.push('/'); + buffer.push_str(idx); + return Location(Arc::from(buffer)); } } } @@ -156,8 +160,7 @@ impl<'a> From<&'a LazyLocation<'_, '_>> for Location { write_escaped_str(&mut buffer, property); } LocationSegment::Index(idx) => { - let mut itoa_buffer = itoa::Buffer::new(); - buffer.push_str(itoa_buffer.format(*idx)); + write_index(&mut buffer, *idx); } } } @@ -184,8 +187,7 @@ impl<'a> From<&'a LazyLocation<'_, '_>> for Location { write_escaped_str(&mut buffer, property); } LocationSegment::Index(idx) => { - let mut itoa_buffer = itoa::Buffer::new(); - buffer.push_str(itoa_buffer.format(*idx)); + write_index(&mut buffer, *idx); } } } @@ -622,9 +624,13 @@ impl Location { Self(Arc::from(buffer)) } LocationSegment::Index(idx) => { - let mut itoa_buf = itoa::Buffer::new(); - let segment = itoa_buf.format(idx); - Self(format!("{parent}/{segment}").into()) + let mut idx_buffer = itoa::Buffer::new(); + let idx = idx_buffer.format(idx); + let mut buffer = String::with_capacity(parent.len() + idx.len() + 1); + buffer.push_str(parent); + buffer.push('/'); + buffer.push_str(idx); + Self(Arc::from(buffer)) } } } @@ -651,44 +657,6 @@ impl Location { } } -pub fn write_escaped_str(buffer: &mut String, value: &str) { - match value.find(['~', '/']) { - Some(mut escape_idx) => { - let mut remaining = value; - - // Loop through the string to replace `~` and `/` - loop { - let (before, after) = remaining.split_at(escape_idx); - // Copy everything before the escape char - buffer.push_str(before); - - // Append the appropriate escape sequence - match after.as_bytes()[0] { - b'~' => buffer.push_str("~0"), - b'/' => buffer.push_str("~1"), - _ => unreachable!(), - } - - // Move past the escaped character - remaining = &after[1..]; - - // Find the next `~` or `/` to continue escaping - if let Some(next_escape_idx) = remaining.find(['~', '/']) { - escape_idx = next_escape_idx; - } else { - // Append any remaining part of the string - buffer.push_str(remaining); - break; - } - } - } - None => { - // If no escape characters are found, append the segment as is - buffer.push_str(value); - } - } -} - impl Default for Location { fn default() -> Self { Self::new() diff --git a/crates/jsonschema/src/retriever.rs b/crates/jsonschema/src/retriever.rs index 4b7698d1..7f6cec78 100644 --- a/crates/jsonschema/src/retriever.rs +++ b/crates/jsonschema/src/retriever.rs @@ -829,19 +829,11 @@ mod async_tests { } }); - // Create registry with default async retriever - let registry = Registry::options() - .async_retriever(DefaultRetriever) - .build([( - "http://example.com/schema", - crate::Draft::Draft202012.create_resource(schema.clone()), - )]) - .await - .expect("Registry creation failed"); - - let validator = crate::options() - .with_registry(registry) + let validator = crate::async_options() + .with_base_uri("http://example.com/schema") + .with_retriever(DefaultRetriever) .build(&schema) + .await .expect("Invalid schema"); let valid = json!({"user": {"name": "John Doe"}}); @@ -860,12 +852,11 @@ mod async_tests { } }); - let result = Registry::options() + let result = Registry::new() .async_retriever(DefaultRetriever) - .build([( - "http://example.com/schema", - crate::Draft::Draft202012.create_resource(schema), - )]) + .add("http://example.com/schema", schema) + .expect("Resource should be accepted") + .async_prepare() .await; assert!(result.is_err()); @@ -903,18 +894,11 @@ mod async_tests { } }); - let registry = Registry::options() - .async_retriever(DefaultRetriever) - .build([( - "http://example.com/schema", - crate::Draft::Draft202012.create_resource(schema.clone()), - )]) - .await - .expect("Registry creation failed"); - - let validator = crate::options() - .with_registry(registry) + let validator = crate::async_options() + .with_base_uri("http://example.com/schema") + .with_retriever(DefaultRetriever) .build(&schema) + .await .expect("Invalid schema"); let valid = json!({ diff --git a/crates/jsonschema/src/validator.rs b/crates/jsonschema/src/validator.rs index 7db3a940..a80ffd0c 100644 --- a/crates/jsonschema/src/validator.rs +++ b/crates/jsonschema/src/validator.rs @@ -289,7 +289,7 @@ impl Validator { /// .build(&schema); /// ``` #[must_use] - pub fn options() -> ValidationOptions { + pub fn options<'i>() -> ValidationOptions<'i> { ValidationOptions::default() } /// Create a default [`ValidationOptions`] configured for async validation. @@ -318,7 +318,8 @@ impl Validator { /// For sync validation, use [`options()`](crate::options()) instead. #[cfg(feature = "resolve-async")] #[must_use] - pub fn async_options() -> ValidationOptions> { + pub fn async_options<'i>( + ) -> ValidationOptions<'i, std::sync::Arc> { ValidationOptions::default() } /// Create a validator using the default options. diff --git a/crates/jsonschema/tests/bundle.rs b/crates/jsonschema/tests/bundle.rs index 38b549c3..cd619096 100644 --- a/crates/jsonschema/tests/bundle.rs +++ b/crates/jsonschema/tests/bundle.rs @@ -1,7 +1,38 @@ -use jsonschema::ReferencingError; -use referencing::Resource; +use jsonschema::{ReferencingError, Registry}; use serde_json::{json, Value}; +const TEST_ROOT_URI: &str = "urn:jsonschema:test:root"; + +fn registry_from_resources<'a>(resources: &'a [(&str, Value)]) -> Registry<'a> { + let mut registry = jsonschema::Registry::new(); + for (uri, schema) in resources { + registry = registry + .add(*uri, schema) + .expect("resource should be accepted"); + } + registry.prepare().expect("registry build failed") +} + +fn try_bundle_with_resources( + root: &Value, + resources: &[(&str, Value)], +) -> Result { + let registry = registry_from_resources(resources); + jsonschema::options() + .with_registry(®istry) + .with_base_uri(TEST_ROOT_URI) + .bundle(root) +} + +fn validator_with_resources(root: &Value, resources: &[(&str, Value)]) -> jsonschema::Validator { + let registry = registry_from_resources(resources); + jsonschema::options() + .with_registry(®istry) + .with_base_uri(TEST_ROOT_URI) + .build(root) + .expect("distributed compile failed") +} + #[cfg(all(feature = "resolve-async", not(target_arch = "wasm32")))] mod async_tests { use super::*; @@ -12,21 +43,21 @@ mod async_tests { "$schema": "https://json-schema.org/draft/2020-12/schema", "$ref": "https://example.com/person.json" }); + let registry = jsonschema::Registry::new() + .add("https://example.com/person.json", person_schema()) + .expect("resource should be accepted") + .prepare() + .expect("registry build failed"); let bundled = jsonschema::async_options() - .with_resource( - "https://example.com/person.json", - Resource::from_contents(person_schema()), - ) + .with_registry(®istry) + .with_base_uri(TEST_ROOT_URI) .bundle(&schema) .await .expect("async bundle failed"); - assert_eq!( - bundled.get("$ref"), - Some(&json!("https://example.com/person.json")) - ); - let defs = bundled.get("$defs").unwrap().as_object().unwrap(); - assert!(defs.contains_key("https://example.com/person.json")); + assert_eq!(bundled["$ref"], json!("https://example.com/person.json")); + let defs = bundled["$defs"].as_object().unwrap(); + assert!(!defs["https://example.com/person.json"].is_null()); } #[tokio::test] @@ -74,27 +105,19 @@ fn test_bundle_single_external_ref() { "$schema": "https://json-schema.org/draft/2020-12/schema", "$ref": "https://example.com/person.json" }); - let bundled = jsonschema::options() - .with_resource( - "https://example.com/person.json", - Resource::from_contents(person_schema()), - ) - .bundle(&schema) - .expect("bundle failed"); + let bundled = try_bundle_with_resources( + &schema, + &[("https://example.com/person.json", person_schema())], + ) + .expect("bundle failed"); // $ref MUST NOT be rewritten (spec requirement) - assert_eq!( - bundled.get("$ref"), - Some(&json!("https://example.com/person.json")) - ); - let defs = bundled.get("$defs").expect("no $defs").as_object().unwrap(); - assert!(defs.contains_key("https://example.com/person.json")); + assert_eq!(bundled["$ref"], json!("https://example.com/person.json")); + let defs = bundled["$defs"].as_object().unwrap(); + assert!(!defs["https://example.com/person.json"].is_null()); // embedded resource MUST have $id let embedded = &defs["https://example.com/person.json"]; - assert_eq!( - embedded.get("$id"), - Some(&json!("https://example.com/person.json")) - ); + assert_eq!(embedded["$id"], json!("https://example.com/person.json")); } #[test] @@ -103,13 +126,11 @@ fn test_bundle_validates_identically() { "$schema": "https://json-schema.org/draft/2020-12/schema", "$ref": "https://example.com/person.json" }); - let bundled = jsonschema::options() - .with_resource( - "https://example.com/person.json", - Resource::from_contents(person_schema()), - ) - .bundle(&schema) - .expect("bundle failed"); + let bundled = try_bundle_with_resources( + &schema, + &[("https://example.com/person.json", person_schema())], + ) + .expect("bundle failed"); let validator = jsonschema::validator_for(&bundled).expect("compile bundled failed"); assert!(validator.is_valid(&json!({"name": "Alice"}))); @@ -145,25 +166,22 @@ fn test_bundle_transitive_refs() { "$schema": "https://json-schema.org/draft/2020-12/schema", "$ref": "https://example.com/person.json" }); - let bundled = jsonschema::options() - .with_resource( - "https://example.com/person.json", - Resource::from_contents(person_with_address), - ) - .with_resource( - "https://example.com/address.json", - Resource::from_contents(address_schema), - ) - .bundle(&root) - .expect("bundle failed"); + let bundled = try_bundle_with_resources( + &root, + &[ + ("https://example.com/person.json", person_with_address), + ("https://example.com/address.json", address_schema), + ], + ) + .expect("bundle failed"); - let defs = bundled.get("$defs").unwrap().as_object().unwrap(); + let defs = bundled["$defs"].as_object().unwrap(); assert!( - defs.contains_key("https://example.com/person.json"), + !defs["https://example.com/person.json"].is_null(), "person missing" ); assert!( - defs.contains_key("https://example.com/address.json"), + !defs["https://example.com/address.json"].is_null(), "address missing" ); } @@ -181,17 +199,13 @@ fn test_bundle_circular_ref() { "$schema": "https://json-schema.org/draft/2020-12/schema", "$ref": "https://example.com/node.json" }); - let bundled = jsonschema::options() - .with_resource( - "https://example.com/node.json", - Resource::from_contents(node_schema), - ) - .bundle(&root) - .expect("bundle failed"); + let bundled = + try_bundle_with_resources(&root, &[("https://example.com/node.json", node_schema)]) + .expect("bundle failed"); - let defs = bundled.get("$defs").unwrap().as_object().unwrap(); + let defs = bundled["$defs"].as_object().unwrap(); assert_eq!(defs.len(), 1, "node.json should appear exactly once"); - assert!(defs.contains_key("https://example.com/node.json")); + assert!(!defs["https://example.com/node.json"].is_null()); } /// A `$ref` like `https://example.com/schema.json#/$defs/Name` should embed @@ -210,12 +224,7 @@ fn test_bundle_fragment_qualified_external_ref() { "name": { "$ref": "https://example.com/schema.json#/$defs/Name" } } }); - let bundled = jsonschema::options() - .with_resource( - "https://example.com/schema.json", - referencing::Resource::from_contents(schemas), - ) - .bundle(&root) + let bundled = try_bundle_with_resources(&root, &[("https://example.com/schema.json", schemas)]) .expect("bundle failed"); // $ref must NOT be rewritten @@ -225,8 +234,8 @@ fn test_bundle_fragment_qualified_external_ref() { json!("https://example.com/schema.json#/$defs/Name") ); // The whole schema.json document is embedded - let defs = bundled.get("$defs").expect("no $defs").as_object().unwrap(); - assert!(defs.contains_key("https://example.com/schema.json")); + let defs = bundled["$defs"].as_object().unwrap(); + assert!(!defs["https://example.com/schema.json"].is_null()); } /// An external schema that internally uses a relative $ref should have its @@ -251,25 +260,22 @@ fn test_bundle_relative_ref_inside_external_schema() { "$schema": "https://json-schema.org/draft/2020-12/schema", "$ref": "https://example.com/schemas/address.json" }); - let bundled = jsonschema::options() - .with_resource( - "https://example.com/schemas/address.json", - referencing::Resource::from_contents(address_schema), - ) - .with_resource( - "https://example.com/schemas/country.json", - referencing::Resource::from_contents(country_schema), - ) - .bundle(&root) - .expect("bundle failed"); + let bundled = try_bundle_with_resources( + &root, + &[ + ("https://example.com/schemas/address.json", address_schema), + ("https://example.com/schemas/country.json", country_schema), + ], + ) + .expect("bundle failed"); - let defs = bundled.get("$defs").expect("no $defs").as_object().unwrap(); + let defs = bundled["$defs"].as_object().unwrap(); assert!( - defs.contains_key("https://example.com/schemas/address.json"), + !defs["https://example.com/schemas/address.json"].is_null(), "address missing" ); assert!( - defs.contains_key("https://example.com/schemas/country.json"), + !defs["https://example.com/schemas/country.json"].is_null(), "country missing (transitive)" ); } @@ -281,17 +287,14 @@ fn test_bundle_inner_ref_not_rewritten() { let middle = json!({ "$id": "https://example.com/middle", "$ref": "https://example.com/leaf", "maximum": 100 }); let root = json!({ "$schema": "https://json-schema.org/draft/2020-12/schema", "$ref": "https://example.com/middle" }); - let bundled = jsonschema::options() - .with_resource( - "https://example.com/leaf", - referencing::Resource::from_contents(leaf), - ) - .with_resource( - "https://example.com/middle", - referencing::Resource::from_contents(middle), - ) - .bundle(&root) - .expect("bundle failed"); + let bundled = try_bundle_with_resources( + &root, + &[ + ("https://example.com/leaf", leaf), + ("https://example.com/middle", middle), + ], + ) + .expect("bundle failed"); assert_eq!( bundled["$ref"], @@ -321,18 +324,16 @@ fn test_bundle_resolves_ref_with_nested_id_scope() { } }); - let bundled = jsonschema::options() - .with_resource( - "https://example.com/A/b.json", - Resource::from_contents(nested_dependency), - ) - .bundle(&root) - .expect("bundle failed"); + let bundled = try_bundle_with_resources( + &root, + &[("https://example.com/A/b.json", nested_dependency)], + ) + .expect("bundle failed"); - let defs = bundled.get("$defs").expect("no $defs").as_object().unwrap(); - assert!(defs.contains_key("A")); + let defs = bundled["$defs"].as_object().unwrap(); + assert!(!defs["A"].is_null()); assert!( - defs.contains_key("https://example.com/A/b.json"), + !defs["https://example.com/A/b.json"].is_null(), "nested dependency was not embedded" ); } @@ -362,30 +363,27 @@ fn test_bundle_supports_legacy_drafts_using_definitions() { "$schema": schema_uri, "$ref": "https://example.com/person.json" }); - - let bundled = jsonschema::options() - .with_resource( + let bundled = try_bundle_with_resources( + &schema, + &[( "https://example.com/person.json", - Resource::from_contents(json!({ + json!({ "$id": "https://example.com/person.json", "$schema": schema_uri, "type": "object", "properties": { "name": { "type": "string" } } - })), - ) - .bundle(&schema) - .expect("bundle failed"); + }), + )], + ) + .expect("bundle failed"); assert!( bundled.get("$defs").is_none(), "unexpected $defs for {schema_uri}" ); - let definitions = bundled - .get("definitions") - .and_then(Value::as_object) - .expect("no definitions object"); + let definitions = bundled["definitions"].as_object().unwrap(); assert!( - definitions.contains_key("https://example.com/person.json"), + !definitions["https://example.com/person.json"].is_null(), "missing bundled resource for {schema_uri}" ); } @@ -397,23 +395,21 @@ fn test_bundle_draft4_embedded_resource_uses_id_keyword() { "$schema": "http://json-schema.org/draft-04/schema#", "$ref": "https://example.com/integer.json" }); - let bundled = jsonschema::options() - .with_resource( + let bundled = try_bundle_with_resources( + &root, + &[( "https://example.com/integer.json", - Resource::from_contents(json!({ + json!({ "$schema": "http://json-schema.org/draft-04/schema#", "type": "integer" - })), - ) - .bundle(&root) - .expect("bundle failed"); + }), + )], + ) + .expect("bundle failed"); let embedded = &bundled["definitions"]["https://example.com/integer.json"]; - assert_eq!( - embedded.get("id"), - Some(&json!("https://example.com/integer.json")) - ); - assert!(embedded.get("$id").is_none()); + assert_eq!(embedded["id"], json!("https://example.com/integer.json")); + assert!(embedded["$id"].is_null()); } #[test] @@ -519,21 +515,13 @@ fn test_bundle_202012_reuses_existing_definitions_container() { "type": "string" }); - let bundled = jsonschema::options() - .with_resource( - "https://example.com/ext.json", - Resource::from_contents(external.clone()), - ) - .bundle(&root) + let bundled = try_bundle_with_resources(&root, &[("https://example.com/ext.json", external)]) .expect("bundle failed"); assert!(bundled.get("$defs").is_none(), "unexpected $defs created"); - let definitions = bundled - .get("definitions") - .and_then(Value::as_object) - .expect("missing definitions"); - assert!(definitions.contains_key("localInt")); - assert!(definitions.contains_key("https://example.com/ext.json")); + let definitions = bundled["definitions"].as_object().unwrap(); + assert!(!definitions["localInt"].is_null()); + assert!(!definitions["https://example.com/ext.json"].is_null()); let validator = jsonschema::validator_for(&bundled).expect("bundled compile failed"); assert!(validator.is_valid(&json!({"local": 1, "external": "ok"}))); @@ -550,24 +538,21 @@ fn test_bundle_draft7_keeps_existing_defs_but_adds_definitions_for_resolution() } }); - let bundled = jsonschema::options() - .with_resource( + let bundled = try_bundle_with_resources( + &root, + &[( "https://example.com/ext.json", - Resource::from_contents(json!({ + json!({ "$schema": "http://json-schema.org/draft-07/schema#", "type": "integer" - })), - ) - .bundle(&root) - .expect("bundle failed"); + }), + )], + ) + .expect("bundle failed"); - assert!(bundled.get("$defs").is_some(), "existing $defs should stay"); + assert!(bundled["$defs"].is_object(), "existing $defs should stay"); assert!( - bundled - .get("definitions") - .and_then(Value::as_object) - .and_then(|defs| defs.get("https://example.com/ext.json")) - .is_some(), + !bundled["definitions"]["https://example.com/ext.json"].is_null(), "draft-07 bundles must embed into definitions for resolvability" ); @@ -582,19 +567,8 @@ fn assert_bundle_parity( valid_instances: &[Value], invalid_instances: &[Value], ) { - // Validator from distributed schemas (registered individually) - let mut opts = jsonschema::options(); - for (uri, schema) in resources { - opts = opts.with_resource(*uri, Resource::from_contents(schema.clone())); - } - let distributed = opts.build(root).expect("distributed compile failed"); - - // Validator from bundled schema - let mut bundle_opts = jsonschema::options(); - for (uri, schema) in resources { - bundle_opts = bundle_opts.with_resource(*uri, Resource::from_contents(schema.clone())); - } - let bundled = bundle_opts.bundle(root).expect("bundle failed"); + let distributed = validator_with_resources(root, resources); + let bundled = try_bundle_with_resources(root, resources).expect("bundle failed"); let bundled_validator = jsonschema::validator_for(&bundled).expect("bundled compile failed"); for instance in valid_instances { @@ -719,26 +693,20 @@ fn test_parity_merge_with_existing_defs() { ); } -/// Walk recurses into embedded schemas; an unresolvable $ref inside one must propagate. +/// Missing refs reachable from the shared registry fail during registry preparation. #[test] -fn test_bundle_error_propagates_from_recursive_walk() { +fn test_registry_prepare_error_propagates_for_missing_transitive_ref() { // `middle` is registered, but it references `leaf` which is not registered. - // The walk recurses into `middle` and fails when resolving `leaf`. + // Preparation should fail before bundling starts. let middle = json!({ "$id": "https://example.com/middle.json", "$schema": "https://json-schema.org/draft/2020-12/schema", "$ref": "https://example.com/leaf.json" }); - let root = json!({ - "$schema": "https://json-schema.org/draft/2020-12/schema", - "$ref": "https://example.com/middle.json" - }); - let result = jsonschema::options() - .with_resource( - "https://example.com/middle.json", - Resource::from_contents(middle), - ) - .bundle(&root); + let result = jsonschema::Registry::new() + .add("https://example.com/middle.json", middle) + .expect("resource should be accepted") + .prepare(); assert!( matches!(result, Err(ReferencingError::Unretrievable { .. })), "expected Unretrievable, got: {result:?}" diff --git a/fuzz/fuzz_targets/referencing.rs b/fuzz/fuzz_targets/referencing.rs index 863fd1f8..e35f0a02 100644 --- a/fuzz/fuzz_targets/referencing.rs +++ b/fuzz/fuzz_targets/referencing.rs @@ -1,6 +1,6 @@ #![no_main] use libfuzzer_sys::fuzz_target; -use referencing::{Draft, Registry}; +use referencing::{uri, Draft, Registry, RegistryBuilder}; fuzz_target!(|data: (&[u8], &[u8], &[u8])| { let (schema, base, reference) = data; @@ -14,11 +14,14 @@ fuzz_target!(|data: (&[u8], &[u8], &[u8])| { Draft::Draft201909, Draft::Draft202012, ] { - let resource = draft.create_resource(schema.clone()); - if let Ok(registry) = Registry::try_new(base, resource) { - if let Ok(resolver) = - registry.try_resolver("http://example.com/schema.json") - { + if let Ok(registry) = Registry::new() + .draft(draft) + .add(base, &schema) + .and_then(RegistryBuilder::prepare) + { + if let Ok(index) = registry.build_index() { + let resolver = index + .resolver(uri::from_str("http://example.com/schema.json").unwrap()); let _resolved = resolver.lookup(reference); } } diff --git a/profiler/Cargo.toml b/profiler/Cargo.toml index f3441d66..5d465b2b 100644 --- a/profiler/Cargo.toml +++ b/profiler/Cargo.toml @@ -19,3 +19,7 @@ dhat-heap = [] [profile.release] debug = true + +[profile.profiling] +inherits = "release" +debug = true diff --git a/profiler/Justfile b/profiler/Justfile index ca7ac5db..ca224e6e 100644 --- a/profiler/Justfile +++ b/profiler/Justfile @@ -31,7 +31,7 @@ flame-custom method schema instance iterations="10000": @echo "Opening custom-{{method}}.svg in browser..." @xdg-open custom-{{method}}.svg 2>/dev/null || open custom-{{method}}.svg 2>/dev/null || echo "Please open custom-{{method}}.svg manually" -# Profile with custom schema only (no instance needed for build/registry methods) +# Profile with custom schema only (no instance needed for compile method) flame-schema method schema iterations="10000": CARGO_PROFILE_BENCH_DEBUG=true cargo flamegraph \ -o schema-{{method}}.svg \ @@ -80,6 +80,7 @@ fast-invalid-iter-errors: (flame "fast-invalid" "iter_errors" "10000") fast-invalid-evaluate: (flame "fast-invalid" "evaluate" "10000") registry: (flame "citm" "registry" "1000") +fhir-build: (flame "fhir" "build" "500") dhat-citm-build: (dhat "citm" "build" "10000") dhat-citm-is-valid: (dhat "citm" "is_valid" "10000") diff --git a/profiler/src/main.rs b/profiler/src/main.rs index 7ddfaa13..e36606d1 100644 --- a/profiler/src/main.rs +++ b/profiler/src/main.rs @@ -1,4 +1,5 @@ -use referencing::{Draft, Registry}; +use jsonschema::Registry; +use referencing::SPECIFICATIONS; use serde_json::Value; use std::fs; @@ -26,7 +27,8 @@ fn main() -> Result<(), Box> { "citm" => ("../crates/benchmark/data/citm_catalog_schema.json".to_string(), Some("../crates/benchmark/data/citm_catalog.json".to_string())), "fast-valid" => ("../crates/benchmark/data/fast_schema.json".to_string(), Some("../crates/benchmark/data/fast_valid.json".to_string())), "fast-invalid" => ("../crates/benchmark/data/fast_schema.json".to_string(), Some("../crates/benchmark/data/fast_invalid.json".to_string())), - _ => return Err(format!("Unknown preset: {}. Available: openapi, swagger, geojson, citm, fast-valid, fast-invalid", preset).into()), + "fhir" => ("../crates/benchmark/data/fhir.schema.json".to_string(), None), + _ => return Err(format!("Unknown preset: {}. Available: openapi, swagger, geojson, citm, fast-valid, fast-invalid, fhir", preset).into()), } } else { let schema_path = pico_args @@ -52,28 +54,25 @@ fn main() -> Result<(), Box> { let schema_str = fs::read_to_string(&args.schema_path)?; let schema: Value = serde_json::from_str(&schema_str)?; + // To initialise metaschema validators + let _ = &*SPECIFICATIONS; + #[cfg(feature = "dhat-heap")] let _profiler = dhat::Profiler::new_heap(); match args.method.as_str() { "build" => { - if args.instance_path.is_some() { - return Err("--instance is not supported for 'build' method".into()); - } for _ in 0..args.iterations { let _ = jsonschema::validator_for(&schema)?; } } "registry" => { - if args.instance_path.is_some() { - return Err("--instance is not supported for 'registry' method".into()); - } for _ in 0..args.iterations { - let input_resources = vec![( - "http://example.com/schema", - Draft::Draft202012.create_resource(schema.clone()), - )]; - let _ = Registry::try_from_resources(input_resources.into_iter()); + let _ = Registry::new() + .extend([("http://example.com/schema", &schema)]) + .expect("Invalid resource") + .prepare() + .expect("Failed to build registry"); } } "is_valid" | "validate" | "iter_errors" | "evaluate" => { @@ -116,7 +115,8 @@ fn main() -> Result<(), Box> { } _ => { return Err( - "Invalid method. Use 'registry', 'build', 'is_valid', 'validate', 'iter_errors', or 'evaluate'".into() + "Invalid method. Use 'registry', 'build', 'is_valid', 'validate', 'iter_errors', or 'evaluate'" + .into(), ); } } From e3202fbfb9ef4c2b58ebe04c742b63667c9d6523 Mon Sep 17 00:00:00 2001 From: Dmitry Dygalo Date: Tue, 17 Mar 2026 11:10:26 +0100 Subject: [PATCH 02/14] wip Signed-off-by: Dmitry Dygalo --- crates/jsonschema-referencing/src/registry.rs | 93 +++++++------------ fuzz/fuzz_targets/referencing.rs | 8 +- 2 files changed, 39 insertions(+), 62 deletions(-) diff --git a/crates/jsonschema-referencing/src/registry.rs b/crates/jsonschema-referencing/src/registry.rs index efe4c178..b00642f1 100644 --- a/crates/jsonschema-referencing/src/registry.rs +++ b/crates/jsonschema-referencing/src/registry.rs @@ -983,6 +983,15 @@ enum ReferenceKind { /// Local `$ref`s are always resolved against the document root. type QueueEntry = (Arc>, Arc>, String, Draft); +/// A deferred local `$ref` target. +/// +/// Like [`QueueEntry`] but carries the pre-resolved value address (`value_addr`) obtained +/// for free during the `pointer()` call at push time. Used in [`process_deferred_refs`] to +/// skip already-visited targets without a second `pointer()` traversal. +/// +/// `(base_uri, document_root_uri, pointer, draft, value_addr)` +type DeferredRef = (Arc>, Arc>, String, Draft, usize); + fn insert_borrowed_anchor_entries<'a>( index_data: &mut PreparedIndex<'a>, uri: &Arc>, @@ -1098,6 +1107,8 @@ fn insert_owned_discovered_index_entries<'a>( struct ProcessingState<'a> { queue: VecDeque, seen: ReferenceTracker, + // The String is the original reference text (e.g. "./foo.json"), kept solely for + // `json-schema://`-scheme error messages where the resolved URI is not user-friendly. external: AHashSet<(String, Uri, ReferenceKind)>, scratch: String, refers_metaschemas: bool, @@ -1108,9 +1119,10 @@ struct ProcessingState<'a> { /// Deferred local-ref targets. During the main traversal, instead of calling /// `collect_external_resources_recursive` immediately when a local `$ref` is found, /// the target is pushed here. After `process_queue` completes (full document traversal), - /// subresource targets are already in `visited_schemas` and return in O(1); - /// non-subresource paths (e.g. `#/components/schemas/Foo`) are still fully traversed. - deferred_refs: Vec, + /// subresource targets are already in `visited_schemas` and skipped in O(1) via the + /// pre-stored value address; non-subresource paths (e.g. `#/components/schemas/Foo`) + /// are still fully traversed. + deferred_refs: Vec, index_data: PreparedIndex<'a>, } @@ -1717,6 +1729,8 @@ async fn run_async_processing_loop<'a>( fn handle_retrieve_error( uri: &Uri, + // The original reference string is used in error messages for `json-schema://` URIs + // where the resolved URI is not user-friendly (e.g. "./foo.json" vs "json-schema:///foo.json"). original: &str, fragmentless: &Uri, error: Box, @@ -1768,7 +1782,7 @@ fn collect_external_resources<'doc>( refers_metaschemas: &mut bool, draft: Draft, doc_key: &Arc>, - deferred_refs: &mut Vec, + deferred_refs: &mut Vec, local_seen: &mut LocalSeen<'doc>, ) -> Result<(), Error> { if base.scheme().as_str() == "urn" { @@ -1787,19 +1801,16 @@ fn collect_external_resources<'doc>( } else if $reference != "#" { if $reference.starts_with('#') { if mark_local_reference(local_seen, base, $reference) { - if let Some((referenced, resolved_base)) = pointer_with_base( - root, - $reference.trim_start_matches('#'), - base, - resolution_cache, - draft, - )? { + let ptr = $reference.trim_start_matches('#'); + if let Some(referenced) = pointer(root, ptr) { let target_draft = draft.detect(referenced); + let value_addr = std::ptr::from_ref::(referenced) as usize; deferred_refs.push(( - resolved_base, + Arc::clone(base), Arc::clone(doc_key), - $reference.trim_start_matches('#').to_string(), + ptr.to_string(), target_draft, + value_addr, )); } } @@ -1879,7 +1890,7 @@ fn collect_external_resources_recursive<'doc>( draft: Draft, visited: &mut AHashSet, doc_key: &Arc>, - deferred_refs: &mut Vec, + deferred_refs: &mut Vec, local_seen: &mut LocalSeen<'doc>, ) -> Result<(), Error> { let ptr = std::ptr::from_ref::(contents) as usize; @@ -1931,7 +1942,9 @@ fn collect_external_resources_recursive<'doc>( /// Process deferred local-ref targets collected during the main traversal. /// /// Called after `process_queue` finishes so that all subresource nodes are already in -/// `visited_schemas`. Subresource targets return in O(1); non-subresource targets +/// `visited_schemas`. Targets that were visited by the main BFS (e.g. `#/definitions/Foo` +/// under a JSON Schema keyword) are skipped in O(1) via the pre-stored value address, +/// avoiding a redundant `pointer()` traversal. Non-subresource targets /// (e.g. `#/components/schemas/Foo`) are still fully traversed. New deferred entries /// added during traversal are also processed iteratively until none remain. fn process_deferred_refs<'a>( @@ -1942,7 +1955,14 @@ fn process_deferred_refs<'a>( ) -> Result<(), Error> { while !state.deferred_refs.is_empty() { let batch = std::mem::take(&mut state.deferred_refs); - for (base, doc_key, pointer_path, draft) in batch { + for (base, doc_key, pointer_path, draft, value_addr) in batch { + // Fast path: if this target was already visited by the main BFS traversal + // (e.g. a `#/definitions/Foo` that `walk_subresources_with_path` descended into), + // all its subresources were processed and `collect_external_resources` was already + // called on each — skip without a redundant `pointer()` traversal. + if state.visited_schemas.contains(&value_addr) { + continue; + } let Some(document) = documents.get(&doc_key) else { continue; }; @@ -2023,47 +2043,6 @@ pub fn pointer<'a>(document: &'a Value, pointer: &str) -> Option<&'a Value> { ) } -#[allow(clippy::type_complexity)] -fn pointer_with_base<'a>( - document: &'a Value, - pointer: &str, - base: &Arc>, - resolution_cache: &mut UriCache, - draft: Draft, -) -> Result>)>, Error> { - if pointer.is_empty() { - return Ok(Some((document, Arc::clone(base)))); - } - if !pointer.starts_with('/') { - return Ok(None); - } - - let mut current = document; - let mut current_base = Arc::clone(base); - let mut current_draft = draft; - - for token in pointer.split('/').skip(1).map(unescape_segment) { - current_draft = current_draft.detect(current); - if let Some(id) = current_draft.id_of(current) { - current_base = resolve_id(¤t_base, id, resolution_cache)?; - } - - current = match current { - Value::Object(map) => match map.get(&*token) { - Some(v) => v, - None => return Ok(None), - }, - Value::Array(list) => match parse_index(&token).and_then(|x| list.get(x)) { - Some(v) => v, - None => return Ok(None), - }, - _ => return Ok(None), - }; - } - - Ok(Some((current, current_base))) -} - // Taken from `serde_json`. #[must_use] pub fn parse_index(s: &str) -> Option { diff --git a/fuzz/fuzz_targets/referencing.rs b/fuzz/fuzz_targets/referencing.rs index e35f0a02..33491665 100644 --- a/fuzz/fuzz_targets/referencing.rs +++ b/fuzz/fuzz_targets/referencing.rs @@ -19,11 +19,9 @@ fuzz_target!(|data: (&[u8], &[u8], &[u8])| { .add(base, &schema) .and_then(RegistryBuilder::prepare) { - if let Ok(index) = registry.build_index() { - let resolver = index - .resolver(uri::from_str("http://example.com/schema.json").unwrap()); - let _resolved = resolver.lookup(reference); - } + let resolver = registry + .resolver(uri::from_str("http://example.com/schema.json").unwrap()); + let _resolved = resolver.lookup(reference); } } } From b3cc7e5d851c2d8282bfa517143a4b00b2c7b219 Mon Sep 17 00:00:00 2001 From: Dmitry Dygalo Date: Tue, 17 Mar 2026 22:27:28 +0100 Subject: [PATCH 03/14] wip Signed-off-by: Dmitry Dygalo --- crates/jsonschema-referencing/Cargo.toml | 1 + .../benches/registry.rs | 35 + crates/jsonschema-referencing/src/lib.rs | 26 + crates/jsonschema-referencing/src/path.rs | 132 +++ crates/jsonschema-referencing/src/registry.rs | 898 +++++++++++++----- crates/jsonschema-referencing/src/resource.rs | 90 +- .../src/specification/draft201909.rs | 109 ++- .../src/specification/draft4.rs | 293 +++++- .../src/specification/draft6.rs | 101 +- .../src/specification/draft7.rs | 106 ++- .../src/specification/mod.rs | 81 +- .../src/specification/subresources.rs | 131 ++- crates/jsonschema/src/paths.rs | 106 +-- profiler/Cargo.toml | 1 + profiler/Justfile | 4 + 15 files changed, 1558 insertions(+), 556 deletions(-) diff --git a/crates/jsonschema-referencing/Cargo.toml b/crates/jsonschema-referencing/Cargo.toml index e1769f5e..62eda696 100644 --- a/crates/jsonschema-referencing/Cargo.toml +++ b/crates/jsonschema-referencing/Cargo.toml @@ -12,6 +12,7 @@ license.workspace = true [features] default = [] retrieve-async = ["dep:async-trait", "dep:futures"] +perf-observe-registry = [] [target.'cfg(target_arch = "wasm32")'.dependencies] getrandom = { version = "0.3.4", features = ["wasm_js"] } diff --git a/crates/jsonschema-referencing/benches/registry.rs b/crates/jsonschema-referencing/benches/registry.rs index 308aa092..2d114af0 100644 --- a/crates/jsonschema-referencing/benches/registry.rs +++ b/crates/jsonschema-referencing/benches/registry.rs @@ -35,6 +35,24 @@ fn bench_subresources(c: &mut Criterion) { BatchSize::SmallInput, ); }); + + // Owned cases force the registry to store persistent pointers for discovered entries. + group.bench_with_input( + BenchmarkId::new("prepare_owned", name), + &schema, + |b, schema| { + b.iter_batched( + || draft.create_resource(schema.clone()), + |resource| { + Registry::new() + .add("http://example.com/schema.json", resource) + .expect("Invalid registry input") + .prepare() + }, + BatchSize::SmallInput, + ); + }, + ); } let drafts = [ (Draft::Draft4, benchmark::GEOJSON, "GeoJSON"), @@ -63,6 +81,23 @@ fn bench_subresources(c: &mut Criterion) { ); }, ); + + group.bench_with_input( + BenchmarkId::new("prepare_owned_with_specifications", name), + &schema, + |b, schema| { + b.iter_batched( + || (draft.create_resource(schema.clone()), &*SPECIFICATIONS), + |(resource, registry)| { + registry + .add("http://example.com/schema.json", resource) + .expect("Invalid registry input") + .prepare() + }, + BatchSize::SmallInput, + ); + }, + ); } group.finish(); diff --git a/crates/jsonschema-referencing/src/lib.rs b/crates/jsonschema-referencing/src/lib.rs index 02b78d77..a9c52361 100644 --- a/crates/jsonschema-referencing/src/lib.rs +++ b/crates/jsonschema-referencing/src/lib.rs @@ -1,6 +1,16 @@ //! # referencing //! //! An implementation-agnostic JSON reference resolution library for Rust. +#[macro_export] +macro_rules! observe_registry { + ($($arg:tt)*) => {{ + #[cfg(feature = "perf-observe-registry")] + { + println!($($arg)*); + } + }}; +} + mod anchors; mod cache; mod error; @@ -23,6 +33,7 @@ pub use fluent_uri::{Iri, IriRef, Uri, UriRef}; pub use list::List; #[doc(hidden)] pub use path::{write_escaped_str, write_index}; +pub use path::{JsonPointerNode, JsonPointerSegment, OwnedJsonPointer}; pub use registry::{ parse_index, pointer, IntoRegistryResource, Registry, RegistryBuilder, SPECIFICATIONS, }; @@ -35,3 +46,18 @@ pub use vocabularies::{Vocabulary, VocabularySet}; #[cfg(feature = "retrieve-async")] pub use retriever::AsyncRetrieve; + +#[cfg(test)] +mod tests { + use crate::{JsonPointerNode, OwnedJsonPointer}; + + #[test] + fn test_json_pointer_types_are_exported_from_crate_root() { + let root = JsonPointerNode::new(); + let child = root.push(1usize); + + let pointer = OwnedJsonPointer::from(&child); + + assert_eq!(pointer.as_str(), "/1"); + } +} diff --git a/crates/jsonschema-referencing/src/path.rs b/crates/jsonschema-referencing/src/path.rs index 036306f9..6d9c7c81 100644 --- a/crates/jsonschema-referencing/src/path.rs +++ b/crates/jsonschema-referencing/src/path.rs @@ -1,3 +1,110 @@ +use std::{borrow::Cow, sync::Arc}; + +#[derive(Clone, Debug, Eq, PartialEq)] +pub enum JsonPointerSegment<'a> { + Key(Cow<'a, str>), + Index(usize), +} + +impl From for JsonPointerSegment<'_> { + fn from(value: usize) -> Self { + Self::Index(value) + } +} + +impl<'a> From<&'a str> for JsonPointerSegment<'a> { + fn from(value: &'a str) -> Self { + Self::Key(Cow::Borrowed(value)) + } +} + +impl<'a> From<&'a String> for JsonPointerSegment<'a> { + fn from(value: &'a String) -> Self { + Self::Key(Cow::Borrowed(value)) + } +} + +impl<'a> From> for JsonPointerSegment<'a> { + fn from(value: Cow<'a, str>) -> Self { + Self::Key(value) + } +} + +#[derive(Debug, Clone, Eq, PartialEq)] +pub struct JsonPointerNode<'a, 'b> { + segment: JsonPointerSegment<'a>, + parent: Option<&'b JsonPointerNode<'b, 'a>>, +} + +impl Default for JsonPointerNode<'_, '_> { + fn default() -> Self { + Self::new() + } +} + +impl JsonPointerNode<'_, '_> { + #[must_use] + pub const fn new() -> Self { + Self { + segment: JsonPointerSegment::Index(0), + parent: None, + } + } +} + +impl<'a, 'b> JsonPointerNode<'a, 'b> { + #[must_use] + pub fn push<'next>( + &'next self, + segment: impl Into>, + ) -> JsonPointerNode<'a, 'next> { + JsonPointerNode { + segment: segment.into(), + parent: Some(self), + } + } + #[must_use] + pub const fn segment(&self) -> &JsonPointerSegment<'a> { + &self.segment + } + + #[must_use] + pub const fn parent(&self) -> Option<&'b JsonPointerNode<'b, 'a>> { + self.parent + } +} + +#[derive(Clone, Debug, Eq, PartialEq)] +pub struct OwnedJsonPointer(Arc); + +impl OwnedJsonPointer { + #[must_use] + pub fn as_str(&self) -> &str { + &self.0 + } +} + +impl<'a> From<&'a JsonPointerNode<'_, 'a>> for OwnedJsonPointer { + fn from(value: &'a JsonPointerNode<'_, 'a>) -> Self { + let mut segments = Vec::new(); + let mut head = value; + while let Some(parent) = head.parent { + segments.push(&head.segment); + head = parent; + } + + let mut buffer = String::new(); + for segment in segments.iter().rev() { + buffer.push('/'); + match segment { + JsonPointerSegment::Key(key) => write_escaped_str(&mut buffer, key), + JsonPointerSegment::Index(idx) => write_index(&mut buffer, *idx), + } + } + Self(Arc::from(buffer)) + } +} + /// Escape a key into a JSON Pointer segment: `~` → `~0`, `/` → `~1`. /// /// Appends the escaped form of `value` directly to `buffer`. @@ -44,3 +151,28 @@ pub fn write_index(buffer: &mut String, idx: usize) { let mut itoa_buffer = itoa::Buffer::new(); buffer.push_str(itoa_buffer.format(idx)); } + +#[cfg(test)] +mod tests { + use super::{JsonPointerNode, OwnedJsonPointer}; + + #[test] + fn test_json_pointer_node_single_index_fast_path() { + let root = JsonPointerNode::new(); + let child = root.push(3usize); + + let pointer = OwnedJsonPointer::from(&child); + + assert_eq!(pointer.as_str(), "/3"); + } + + #[test] + fn test_json_pointer_node_escapes_property_names() { + let root = JsonPointerNode::new(); + let child = root.push("foo/bar~baz"); + + let pointer = OwnedJsonPointer::from(&child); + + assert_eq!(pointer.as_str(), "/foo~1bar~0baz"); + } +} diff --git a/crates/jsonschema-referencing/src/registry.rs b/crates/jsonschema-referencing/src/registry.rs index b00642f1..373aca9a 100644 --- a/crates/jsonschema-referencing/src/registry.rs +++ b/crates/jsonschema-referencing/src/registry.rs @@ -13,11 +13,12 @@ use serde_json::Value; use crate::{ cache::{SharedUriCache, UriCache}, meta::{self, metas_for_draft}, - resource::{unescape_segment, PathSegment, PathStack}, + resource::unescape_segment, small_map::SmallMap, uri, vocabularies::{self, VocabularySet}, - Anchor, DefaultRetriever, Draft, Error, Resolver, Resource, ResourceRef, Retrieve, + Anchor, DefaultRetriever, Draft, Error, JsonPointerNode, JsonPointerSegment, Resolver, + Resource, ResourceRef, Retrieve, }; #[derive(Debug)] @@ -179,19 +180,20 @@ impl ParsedPointer { Some(Self { segments }) } - fn from_path_stack(path: &PathStack<'_>) -> Option { - let mut pointer = Self::from_json_pointer(path.base_pointer())?; - for segment in path.segments() { - match segment { - PathSegment::Key(key) => pointer - .segments - .push(ParsedPointerSegment::Key((*key).into())), - PathSegment::Index(index) => { - pointer.segments.push(ParsedPointerSegment::Index(*index)); - } - } + fn from_pointer_node(path: &JsonPointerNode<'_, '_>) -> Self { + let mut segments = Vec::new(); + let mut head = path; + + while let Some(parent) = head.parent() { + segments.push(match head.segment() { + JsonPointerSegment::Key(key) => ParsedPointerSegment::Key(key.as_ref().into()), + JsonPointerSegment::Index(idx) => ParsedPointerSegment::Index(*idx), + }); + head = parent; } - Some(pointer) + + segments.reverse(); + Self { segments } } fn lookup<'a>(&self, document: &'a Value) -> Option<&'a Value> { @@ -832,101 +834,44 @@ fn build_prepared_index_for_documents<'a>( documents: &DocumentStore<'a>, resolution_cache: &mut UriCache, ) -> Result, Error> { - let mut index_data = PreparedIndex::default(); + let mut state = ProcessingState::new(); + let mut known_resources = KnownResources::default(); + for (doc_uri, document) in documents { - insert_root_index_entries(&mut index_data, doc_uri, document); - let root = document.contents(); - let borrowed_root = document.borrowed_contents(); - let initial_base = Arc::clone(doc_uri); - // Stack: (base_uri, json_pointer_from_root, draft) - let mut work: Vec<(Arc>, String, Draft)> = - vec![(initial_base, String::new(), document.draft())]; - while let Some((base, ptr_str, draft)) = work.pop() { - let contents = if ptr_str.is_empty() { - root - } else { - match pointer(root, &ptr_str) { - Some(v) => v, - None => continue, - } - }; - let original_base = Arc::clone(&base); - let mut current_base = base; - let (id, has_anchors) = draft.id_and_has_anchors(contents); - let is_root_entry = ptr_str.is_empty(); - let Some(parsed_pointer) = ParsedPointer::from_json_pointer(&ptr_str) else { - continue; - }; - if let Some(id) = id { - current_base = resolve_id(¤t_base, id, resolution_cache)?; - let insert_resource = current_base != original_base; - if !(is_root_entry && current_base == *doc_uri) && (insert_resource || has_anchors) - { - if let Some(root) = borrowed_root { - insert_borrowed_discovered_index_entries( - &mut index_data, - ¤t_base, - draft, - insert_resource, - if ptr_str.is_empty() { - root - } else { - pointer(root, &ptr_str) - .expect("borrowed root contents were already resolved") - }, - ); - } else { - insert_owned_discovered_index_entries( - &mut index_data, - ¤t_base, - document, - &parsed_pointer, - draft, - insert_resource, - contents, - ); - } - } - } else if has_anchors && !is_root_entry { - if let Some(root) = borrowed_root { - insert_borrowed_discovered_index_entries( - &mut index_data, - ¤t_base, - draft, - false, - if ptr_str.is_empty() { - root - } else { - pointer(root, &ptr_str) - .expect("borrowed root contents were already resolved") - }, - ); - } else { - insert_owned_discovered_index_entries( - &mut index_data, - ¤t_base, - document, - &parsed_pointer, - draft, - false, - contents, - ); - } - } - // Push children with their absolute paths - let base_for_children = Arc::clone(¤t_base); - let mut path = PathStack::from_base(ptr_str); - let _ = draft.walk_subresources_with_path( - contents, - &mut path, - &mut |p, _child, child_draft| { - work.push((Arc::clone(&base_for_children), p.to_pointer(), child_draft)); - Ok::<(), Error>(()) - }, - ); + known_resources.insert((**doc_uri).clone()); + insert_root_index_entries(&mut state.index_data, doc_uri, document); + } + + for (doc_uri, document) in documents { + if document.borrowed_contents().is_some() { + let mut local_seen = LocalSeen::new(); + process_borrowed_document( + Arc::clone(doc_uri), + doc_uri, + document, + "", + document.draft(), + &mut state, + &mut known_resources, + resolution_cache, + &mut local_seen, + )?; + } else { + let mut local_seen = LocalSeen::new(); + process_owned_document( + Arc::clone(doc_uri), + doc_uri, + document, + "", + document.draft(), + &mut state, + &mut known_resources, + resolution_cache, + &mut local_seen, + )?; } } - Ok(index_data) + Ok(state.index_data) } type KnownResources = AHashSet>; @@ -1123,6 +1068,8 @@ struct ProcessingState<'a> { /// pre-stored value address; non-subresource paths (e.g. `#/components/schemas/Foo`) /// are still fully traversed. deferred_refs: Vec, + draft4_reference_scratch: Vec<(&'a str, &'static str)>, + draft4_child_scratch: Vec<(&'a Value, Draft)>, index_data: PreparedIndex<'a>, } @@ -1137,6 +1084,8 @@ impl ProcessingState<'_> { custom_metaschemas: Vec::new(), visited_schemas: AHashSet::new(), deferred_refs: Vec::new(), + draft4_reference_scratch: Vec::new(), + draft4_child_scratch: Vec::new(), index_data: PreparedIndex::default(), } } @@ -1204,167 +1153,481 @@ fn process_input_resources_mixed<'a>( } } -fn process_queue<'a, 'r>( +fn process_queue<'r>( state: &mut ProcessingState<'r>, - documents: &'a DocumentStore<'r>, + documents: &DocumentStore<'r>, known_resources: &mut KnownResources, resolution_cache: &mut UriCache, - local_seen: &mut LocalSeen<'a>, ) -> Result<(), Error> { while let Some((base, document_root_uri, pointer_path, draft)) = state.queue.pop_front() { let Some(document) = documents.get(&document_root_uri) else { continue; }; - let root = document.contents(); - let borrowed_root = document.borrowed_contents(); - let borrowed_contents = if pointer_path.is_empty() { - borrowed_root - } else { - borrowed_root.and_then(|root| pointer(root, &pointer_path)) - }; - let Some(contents) = (if pointer_path.is_empty() { - Some(root) - } else { - pointer(root, &pointer_path) - }) else { + if document.borrowed_contents().is_some() { + let mut document_local_seen = LocalSeen::new(); + process_borrowed_document( + base, + &document_root_uri, + document, + &pointer_path, + draft, + state, + known_resources, + resolution_cache, + &mut document_local_seen, + )?; continue; - }; - - let resource = ResourceRef::new(contents, draft); - let mut path = PathStack::from_base(pointer_path); - process_resource_tree( + } + let mut document_local_seen = LocalSeen::new(); + process_owned_document( base, - root, - borrowed_contents, - resource, - &mut path, &document_root_uri, document, + &pointer_path, + draft, state, known_resources, resolution_cache, - local_seen, + &mut document_local_seen, )?; } Ok(()) } -fn process_resource_tree<'a, 'r>( - mut base: Arc>, - root: &'a Value, - borrowed_contents: Option<&'r Value>, - resource: ResourceRef<'a>, - path: &mut PathStack<'a>, - doc_key: &Arc>, +fn process_borrowed_document<'r>( + current_base_uri: Arc>, + document_root_uri: &Arc>, document: &Arc>, + pointer_path: &str, + draft: Draft, state: &mut ProcessingState<'r>, known_resources: &mut KnownResources, resolution_cache: &mut UriCache, - local_seen: &mut LocalSeen<'a>, + local_seen: &mut LocalSeen<'r>, ) -> Result<(), Error> { - let (id, has_anchors) = resource.draft().id_and_has_anchors(resource.contents()); - let is_root_entry = path.base_pointer().is_empty() && path.segments().is_empty(); - if let Some(id) = id { - let original_base = Arc::clone(&base); - base = resolve_id(&base, id, resolution_cache)?; - known_resources.insert((*base).clone()); - let insert_resource = base != original_base; - if is_root_entry && base == *doc_key { - // Root resource / anchors were already inserted under the storage URI. - } else if let Some(contents) = borrowed_contents { - insert_borrowed_discovered_index_entries( - &mut state.index_data, - &base, - resource.draft(), - insert_resource, - contents, - ); - } else if insert_resource || has_anchors { - if let Some(pointer) = ParsedPointer::from_path_stack(path) { - insert_owned_discovered_index_entries( + let Some(document_root) = document.borrowed_contents() else { + return Ok(()); + }; + let Some(subschema) = (if pointer_path.is_empty() { + Some(document_root) + } else { + pointer(document_root, pointer_path) + }) else { + return Ok(()); + }; + + explore_borrowed_subtree( + current_base_uri, + document_root, + subschema, + draft, + pointer_path.is_empty(), + document_root_uri, + state, + known_resources, + resolution_cache, + local_seen, + ) +} + +fn explore_borrowed_subtree<'r>( + mut current_base_uri: Arc>, + document_root: &'r Value, + subschema: &'r Value, + draft: Draft, + is_root_entry: bool, + document_root_uri: &Arc>, + state: &mut ProcessingState<'r>, + known_resources: &mut KnownResources, + resolution_cache: &mut UriCache, + local_seen: &mut LocalSeen<'r>, +) -> Result<(), Error> { + #[cfg(feature = "perf-observe-registry")] + if let Some(object) = subschema.as_object() { + crate::observe_registry!("registry.borrowed.object_len={}", object.len()); + } + if draft == Draft::Draft4 { + let Some(probe) = draft.probe_draft4_borrowed_object(subschema) else { + return Ok(()); + }; + #[cfg(feature = "perf-observe-registry")] + { + let id_scan = match (probe.id.is_some(), probe.has_anchor) { + (false, false) => "none", + (true, false) => "id_only", + (false, true) => "anchor_only", + (true, true) => "id_and_anchor", + }; + crate::observe_registry!("registry.id_scan={id_scan}"); + } + if let Some(id) = probe.id { + let original_base_uri = Arc::clone(¤t_base_uri); + current_base_uri = resolve_id(¤t_base_uri, id, resolution_cache)?; + known_resources.insert((*current_base_uri).clone()); + let insert_resource = current_base_uri != original_base_uri; + if !(is_root_entry && current_base_uri == *document_root_uri) { + insert_borrowed_discovered_index_entries( &mut state.index_data, - &base, - document, - &pointer, - resource.draft(), + ¤t_base_uri, + draft, insert_resource, - resource.contents(), + subschema, ); } - } - } else if has_anchors { - if is_root_entry { - // Root anchors were already inserted under the storage URI. - } else if let Some(contents) = borrowed_contents { + } else if probe.has_anchor && !is_root_entry { insert_borrowed_discovered_index_entries( &mut state.index_data, - &base, - resource.draft(), + ¤t_base_uri, + draft, false, - contents, + subschema, ); - } else if let Some(pointer) = ParsedPointer::from_path_stack(path) { + } + + let subschema_ptr = std::ptr::from_ref::(subschema) as usize; + if state.visited_schemas.insert(subschema_ptr) + && probe.has_ref_or_schema { + let ref_start = state.draft4_reference_scratch.len(); + let child_start = state.draft4_child_scratch.len(); + draft + .scan_draft4_borrowed_object_into_scratch( + subschema, + &mut state.draft4_reference_scratch, + &mut state.draft4_child_scratch, + ) + .expect("draft4 scan should always see an object here"); + let ref_end = state.draft4_reference_scratch.len(); + let child_end = state.draft4_child_scratch.len(); + + for idx in ref_start..ref_end { + let (reference, key) = state.draft4_reference_scratch[idx]; + if reference.starts_with("https://json-schema.org/draft/") + || reference.starts_with("http://json-schema.org/draft-") + || current_base_uri + .as_str() + .starts_with("https://json-schema.org/draft/") + { + if key == "$ref" { + state.refers_metaschemas = true; + } + continue; + } + if reference == "#" { + continue; + } + if reference.starts_with('#') { + if mark_local_reference(local_seen, ¤t_base_uri, reference) { + let ptr = reference.trim_start_matches('#'); + if let Some(referenced) = pointer(document_root, ptr) { + let target_draft = draft.detect(referenced); + let value_addr = std::ptr::from_ref::(referenced) as usize; + state.deferred_refs.push(( + Arc::clone(¤t_base_uri), + Arc::clone(document_root_uri), + ptr.to_string(), + target_draft, + value_addr, + )); + } + } + continue; + } + if mark_reference(&mut state.seen, ¤t_base_uri, reference) { + let resolved = if current_base_uri.has_fragment() { + let mut base_without_fragment = current_base_uri.as_ref().clone(); + base_without_fragment.set_fragment(None); + + let (path, fragment) = match reference.split_once('#') { + Some((path, fragment)) => (path, Some(fragment)), + None => (reference, None), + }; + + let mut resolved = (*resolution_cache + .resolve_against(&base_without_fragment.borrow(), path)?) + .clone(); + if let Some(fragment) = fragment { + if let Some(encoded) = uri::EncodedString::new(fragment) { + resolved = resolved.with_fragment(Some(encoded)); + } else { + uri::encode_to(fragment, &mut state.scratch); + resolved = resolved.with_fragment(Some( + uri::EncodedString::new_or_panic(&state.scratch), + )); + state.scratch.clear(); + } + } + resolved + } else { + (*resolution_cache + .resolve_against(¤t_base_uri.borrow(), reference)?) + .clone() + }; + + let kind = if key == "$schema" { + ReferenceKind::Schema + } else { + ReferenceKind::Ref + }; + state + .external + .insert((reference.to_string(), resolved, kind)); + } + } + + let mut idx = child_start; + while idx < child_end { + let (child, child_draft) = state.draft4_child_scratch[idx]; + idx += 1; + explore_borrowed_subtree( + Arc::clone(¤t_base_uri), + document_root, + child, + child_draft, + false, + document_root_uri, + state, + known_resources, + resolution_cache, + local_seen, + )?; + } + + state.draft4_reference_scratch.truncate(ref_start); + state.draft4_child_scratch.truncate(child_start); + return Ok(()); + } + draft.walk_borrowed_subresources(subschema, &mut |child, child_draft| { + explore_borrowed_subtree( + Arc::clone(¤t_base_uri), + document_root, + child, + child_draft, + false, + document_root_uri, + state, + known_resources, + resolution_cache, + local_seen, + ) + })?; + return Ok(()); + } + let (id, has_anchors) = draft.id_and_has_anchors(subschema); + #[cfg(feature = "perf-observe-registry")] + { + let id_scan = match (id.is_some(), has_anchors) { + (false, false) => "none", + (true, false) => "id_only", + (false, true) => "anchor_only", + (true, true) => "id_and_anchor", + }; + crate::observe_registry!("registry.id_scan={id_scan}"); + } + if let Some(id) = id { + let original_base_uri = Arc::clone(¤t_base_uri); + current_base_uri = resolve_id(¤t_base_uri, id, resolution_cache)?; + known_resources.insert((*current_base_uri).clone()); + let insert_resource = current_base_uri != original_base_uri; + if !(is_root_entry && current_base_uri == *document_root_uri) { + insert_borrowed_discovered_index_entries( + &mut state.index_data, + ¤t_base_uri, + draft, + insert_resource, + subschema, + ); + } + } else if has_anchors && !is_root_entry { + insert_borrowed_discovered_index_entries( + &mut state.index_data, + ¤t_base_uri, + draft, + false, + subschema, + ); + } + + let subschema_ptr = std::ptr::from_ref::(subschema) as usize; + if state.visited_schemas.insert(subschema_ptr) { + collect_external_resources( + ¤t_base_uri, + document_root, + subschema, + &mut state.external, + &mut state.seen, + resolution_cache, + &mut state.scratch, + &mut state.refers_metaschemas, + draft, + document_root_uri, + &mut state.deferred_refs, + local_seen, + )?; + } + + draft.walk_borrowed_subresources(subschema, &mut |child, child_draft| { + explore_borrowed_subtree( + Arc::clone(¤t_base_uri), + document_root, + child, + child_draft, + false, + document_root_uri, + state, + known_resources, + resolution_cache, + local_seen, + ) + }) +} + +fn process_owned_document<'a, 'r>( + current_base_uri: Arc>, + document_root_uri: &Arc>, + document: &'a Arc>, + pointer_path: &str, + draft: Draft, + state: &mut ProcessingState<'r>, + known_resources: &mut KnownResources, + resolution_cache: &mut UriCache, + local_seen: &mut LocalSeen<'a>, +) -> Result<(), Error> { + let document_root = document.contents(); + let Some(subschema) = (if pointer_path.is_empty() { + Some(document_root) + } else { + pointer(document_root, pointer_path) + }) else { + return Ok(()); + }; + let parsed_pointer = ParsedPointer::from_json_pointer(pointer_path); + + with_pointer_node_from_parsed(parsed_pointer.as_ref(), |path| { + explore_owned_subtree( + current_base_uri, + document_root, + subschema, + draft, + pointer_path.is_empty(), + path, + document_root_uri, + document, + state, + known_resources, + resolution_cache, + local_seen, + ) + }) +} + +fn with_pointer_node_from_parsed( + pointer: Option<&ParsedPointer>, + f: impl FnOnce(&JsonPointerNode<'_, '_>) -> R, +) -> R { + fn descend<'a, 'node, R>( + segments: &'a [ParsedPointerSegment], + current: &'node JsonPointerNode<'a, 'node>, + f: impl FnOnce(&JsonPointerNode<'_, '_>) -> R, + ) -> R { + if let Some((head, tail)) = segments.split_first() { + let next = match head { + ParsedPointerSegment::Key(key) => current.push(key.as_ref()), + ParsedPointerSegment::Index(idx) => current.push(*idx), + }; + descend(tail, &next, f) + } else { + f(current) + } + } + + let root = JsonPointerNode::new(); + match pointer { + Some(pointer) => descend(&pointer.segments, &root, f), + None => f(&root), + } +} + +fn explore_owned_subtree<'a, 'r>( + mut current_base_uri: Arc>, + document_root: &'a Value, + subschema: &'a Value, + draft: Draft, + is_root_entry: bool, + path: &JsonPointerNode<'_, '_>, + document_root_uri: &Arc>, + document: &Arc>, + state: &mut ProcessingState<'r>, + known_resources: &mut KnownResources, + resolution_cache: &mut UriCache, + local_seen: &mut LocalSeen<'a>, +) -> Result<(), Error> { + let (id, has_anchors) = draft.id_and_has_anchors(subschema); + if let Some(id) = id { + let original_base_uri = Arc::clone(¤t_base_uri); + current_base_uri = resolve_id(¤t_base_uri, id, resolution_cache)?; + known_resources.insert((*current_base_uri).clone()); + let insert_resource = current_base_uri != original_base_uri; + if !(is_root_entry && current_base_uri == *document_root_uri) + && (insert_resource || has_anchors) + { + let pointer = ParsedPointer::from_pointer_node(path); insert_owned_discovered_index_entries( &mut state.index_data, - &base, + ¤t_base_uri, document, &pointer, - resource.draft(), - false, - resource.contents(), + draft, + insert_resource, + subschema, ); } + } else if has_anchors && !is_root_entry { + let pointer = ParsedPointer::from_pointer_node(path); + insert_owned_discovered_index_entries( + &mut state.index_data, + ¤t_base_uri, + document, + &pointer, + draft, + false, + subschema, + ); } - let contents_ptr = std::ptr::from_ref::(resource.contents()) as usize; - if state.visited_schemas.insert(contents_ptr) { + let subschema_ptr = std::ptr::from_ref::(subschema) as usize; + if state.visited_schemas.insert(subschema_ptr) { collect_external_resources( - &base, - root, - resource.contents(), + ¤t_base_uri, + document_root, + subschema, &mut state.external, &mut state.seen, resolution_cache, &mut state.scratch, &mut state.refers_metaschemas, - resource.draft(), - doc_key, + draft, + document_root_uri, &mut state.deferred_refs, local_seen, )?; } - resource.draft().walk_subresources_with_path( - resource.contents(), - path, - &mut |child_path, child, child_draft| { - let borrowed_child = - borrowed_contents.and_then(|contents| match child_path.segments().last() { - Some(PathSegment::Key(key)) => match contents { - Value::Object(map) => map.get(*key), - _ => None, - }, - Some(PathSegment::Index(index)) => match contents { - Value::Array(list) => list.get(*index), - _ => None, - }, - None => Some(contents), - }); - process_resource_tree( - Arc::clone(&base), - root, - borrowed_child, - ResourceRef::new(child, child_draft), - child_path, - doc_key, - document, - state, - known_resources, - resolution_cache, - local_seen, - ) - }, - ) + draft.walk_owned_subresources(subschema, path, &mut |child_path, child, child_draft| { + explore_owned_subtree( + Arc::clone(¤t_base_uri), + document_root, + child, + child_draft, + false, + child_path, + document_root_uri, + document, + state, + known_resources, + resolution_cache, + local_seen, + ) + }) } fn enqueue_fragment_entry( @@ -1492,13 +1755,7 @@ fn run_sync_processing_loop<'a>( { // SAFETY: widens 'static → '_ (covariant); set is empty after reuse_local_seen clears it. let mut local_seen: LocalSeen<'_> = unsafe { reuse_local_seen(local_seen_buf) }; - process_queue( - state, - documents, - known_resources, - resolution_cache, - &mut local_seen, - )?; + process_queue(state, documents, known_resources, resolution_cache)?; process_deferred_refs(state, documents, resolution_cache, &mut local_seen)?; // SAFETY: clears all '_ refs before narrowing back to 'static to reclaim the buffer. local_seen_buf = unsafe { reuse_local_seen(local_seen) }; @@ -1544,13 +1801,7 @@ fn run_sync_processing_loop<'a>( if !state.queue.is_empty() { // SAFETY: widens 'static → '_ (covariant); set is empty after reuse_local_seen clears it. let mut local_seen: LocalSeen<'_> = unsafe { reuse_local_seen(local_seen_buf) }; - process_queue( - state, - documents, - known_resources, - resolution_cache, - &mut local_seen, - )?; + process_queue(state, documents, known_resources, resolution_cache)?; process_deferred_refs(state, documents, resolution_cache, &mut local_seen)?; } @@ -1638,13 +1889,7 @@ async fn run_async_processing_loop<'a>( { // SAFETY: widens 'static → '_ (covariant); set is empty after reuse_local_seen clears it. let mut local_seen: LocalSeen<'_> = unsafe { reuse_local_seen(local_seen_buf) }; - process_queue( - state, - documents, - known_resources, - resolution_cache, - &mut local_seen, - )?; + process_queue(state, documents, known_resources, resolution_cache)?; process_deferred_refs(state, documents, resolution_cache, &mut local_seen)?; // SAFETY: clears all '_ refs before narrowing back to 'static to reclaim the buffer. local_seen_buf = unsafe { reuse_local_seen(local_seen) }; @@ -1714,13 +1959,7 @@ async fn run_async_processing_loop<'a>( if !state.queue.is_empty() { // SAFETY: widens 'static → '_ (covariant); set is empty after reuse_local_seen clears it. let mut local_seen: LocalSeen<'_> = unsafe { reuse_local_seen(local_seen_buf) }; - process_queue( - state, - documents, - known_resources, - resolution_cache, - &mut local_seen, - )?; + process_queue(state, documents, known_resources, resolution_cache)?; process_deferred_refs(state, documents, resolution_cache, &mut local_seen)?; } @@ -1800,7 +2039,9 @@ fn collect_external_resources<'doc>( } } else if $reference != "#" { if $reference.starts_with('#') { - if mark_local_reference(local_seen, base, $reference) { + crate::observe_registry!("registry.local_ref={}", $reference); + if draft == Draft::Draft4 || mark_local_reference(local_seen, base, $reference) + { let ptr = $reference.trim_start_matches('#'); if let Some(referenced) = pointer(root, ptr) { let target_draft = draft.detect(referenced); @@ -1815,6 +2056,11 @@ fn collect_external_resources<'doc>( } } } else if mark_reference(seen, base, $reference) { + if $key == "$schema" { + crate::observe_registry!("registry.schema_ref={}", $reference); + } else { + crate::observe_registry!("registry.external_ref={}", $reference); + } let resolved = if base.has_fragment() { let mut base_without_fragment = base.as_ref().clone(); base_without_fragment.set_fragment(None); @@ -1854,6 +2100,7 @@ fn collect_external_resources<'doc>( } if let Some(object) = contents.as_object() { + crate::observe_registry!("registry.ref_scan.object_len={}", object.len()); if object.len() < 3 { for (key, value) in object { if key == "$ref" { @@ -2027,6 +2274,14 @@ fn resolve_id( /// /// **NOTE**: A slightly faster version of pointer resolution based on `Value::pointer` from `serde_json`. pub fn pointer<'a>(document: &'a Value, pointer: &str) -> Option<&'a Value> { + crate::observe_registry!( + "registry.pointer_segments={}", + pointer + .as_bytes() + .iter() + .filter(|&&byte| byte == b'/') + .count() + ); if pointer.is_empty() { return Some(document); } @@ -2053,16 +2308,21 @@ pub fn parse_index(s: &str) -> Option { } #[cfg(test)] mod tests { - use std::error::Error as _; + use std::{error::Error as _, sync::Arc}; use ahash::AHashMap; use fluent_uri::Uri; use serde_json::{json, Value}; use test_case::test_case; - use crate::{resource::PathStack, uri::from_str, Anchor, Draft, Registry, Resource, Retrieve}; + use crate::{uri::from_str, Anchor, Draft, JsonPointerNode, Registry, Resource, Retrieve}; - use super::{pointer, ParsedPointer, SPECIFICATIONS}; + use super::{ + insert_root_index_entries, pointer, process_borrowed_document, process_owned_document, + IndexedResource, KnownResources, LocalSeen, ParsedPointer, ProcessingState, StoredDocument, + SPECIFICATIONS, + }; + use crate::cache::UriCache; #[test] fn test_empty_pointer() { @@ -2071,7 +2331,7 @@ mod tests { } #[test] - fn test_parsed_pointer_from_path_stack_matches_pointer_lookup() { + fn test_parsed_pointer_from_json_pointer_node_matches_pointer_lookup() { let document = json!({ "$defs": { "foo/bar": [ @@ -2079,11 +2339,12 @@ mod tests { ] } }); - let mut path = PathStack::from_base("/$defs".to_string()); - path.push_key("foo/bar"); - path.push_index(0); + let root = JsonPointerNode::new(); + let defs = root.push("$defs"); + let entry = defs.push("foo/bar"); + let node = entry.push(0); - let parsed = ParsedPointer::from_path_stack(&path).expect("Pointer should parse"); + let parsed = ParsedPointer::from_pointer_node(&node); assert_eq!( parsed.lookup(&document), pointer(&document, "/$defs/foo~1bar/0") @@ -2220,6 +2481,139 @@ mod tests { } } + #[test] + fn test_process_borrowed_document_indexes_embedded_resource_as_borrowed() { + let schema = json!({ + "$defs": { + "embedded": { + "$id": "http://example.com/embedded", + "type": "string" + } + } + }); + let doc_key = Arc::new(from_str("http://example.com/root").expect("valid root URI")); + let document = Arc::new(StoredDocument::borrowed(&schema, Draft::Draft202012)); + let mut state = ProcessingState::new(); + let mut known_resources = KnownResources::default(); + let mut resolution_cache = UriCache::new(); + let mut local_seen = LocalSeen::new(); + + known_resources.insert((*doc_key).clone()); + insert_root_index_entries(&mut state.index_data, &doc_key, &document); + + process_borrowed_document( + Arc::clone(&doc_key), + &doc_key, + &document, + "", + Draft::Draft202012, + &mut state, + &mut known_resources, + &mut resolution_cache, + &mut local_seen, + ) + .expect("borrowed document traversal should succeed"); + + let embedded_uri = + Arc::new(from_str("http://example.com/embedded").expect("valid embedded URI")); + match state.index_data.resources.get(&embedded_uri) { + Some(IndexedResource::Borrowed(resource)) => { + assert_eq!( + resource.contents(), + &json!({"$id": "http://example.com/embedded", "type": "string"}) + ); + } + other => panic!("expected borrowed embedded resource entry, got {other:?}"), + } + } + + #[test] + fn test_process_owned_document_indexes_embedded_resource_as_owned() { + let schema = json!({ + "$defs": { + "embedded": { + "$id": "http://example.com/embedded", + "type": "string" + } + } + }); + let doc_key = Arc::new(from_str("http://example.com/root").expect("valid root URI")); + let document = Arc::new(StoredDocument::owned(schema, Draft::Draft202012)); + let mut state = ProcessingState::new(); + let mut known_resources = KnownResources::default(); + let mut resolution_cache = UriCache::new(); + let mut local_seen = LocalSeen::new(); + + known_resources.insert((*doc_key).clone()); + insert_root_index_entries(&mut state.index_data, &doc_key, &document); + + process_owned_document( + Arc::clone(&doc_key), + &doc_key, + &document, + "", + Draft::Draft202012, + &mut state, + &mut known_resources, + &mut resolution_cache, + &mut local_seen, + ) + .expect("owned document traversal should succeed"); + + let embedded_uri = + Arc::new(from_str("http://example.com/embedded").expect("valid embedded URI")); + match state.index_data.resources.get(&embedded_uri) { + Some(IndexedResource::Owned { .. }) => {} + other => panic!("expected owned embedded resource entry, got {other:?}"), + } + } + + #[test] + fn test_process_owned_document_indexes_fragment_root_with_pointer_prefix() { + let schema = json!({ + "$defs": { + "embedded": { + "$id": "http://example.com/embedded", + "type": "string" + } + } + }); + let doc_key = Arc::new(from_str("http://example.com/root").expect("valid root URI")); + let document = Arc::new(StoredDocument::owned(schema, Draft::Draft202012)); + let mut state = ProcessingState::new(); + let mut known_resources = KnownResources::default(); + let mut resolution_cache = UriCache::new(); + let mut local_seen = LocalSeen::new(); + + known_resources.insert((*doc_key).clone()); + insert_root_index_entries(&mut state.index_data, &doc_key, &document); + + process_owned_document( + Arc::clone(&doc_key), + &doc_key, + &document, + "/$defs/embedded", + Draft::Draft202012, + &mut state, + &mut known_resources, + &mut resolution_cache, + &mut local_seen, + ) + .expect("owned fragment traversal should succeed"); + + let embedded_uri = + Arc::new(from_str("http://example.com/embedded").expect("valid embedded URI")); + match state.index_data.resources.get(&embedded_uri) { + Some(IndexedResource::Owned { pointer, .. }) => { + assert_eq!( + pointer.lookup(document.contents()), + Some(&json!({"$id": "http://example.com/embedded", "type": "string"})) + ); + } + other => panic!("expected owned embedded resource entry, got {other:?}"), + } + } + #[test] fn test_prepare_merges_anchor_entries_for_shared_effective_uri() { let registry = Registry::new() diff --git a/crates/jsonschema-referencing/src/resource.rs b/crates/jsonschema-referencing/src/resource.rs index 2777743c..938f7afe 100644 --- a/crates/jsonschema-referencing/src/resource.rs +++ b/crates/jsonschema-referencing/src/resource.rs @@ -2,95 +2,7 @@ use std::borrow::Cow; use serde_json::Value; -use crate::{write_escaped_str, write_index, Draft, Error, Resolved, Resolver, Segments}; - -/// A segment in a JSON Pointer path, stored lazily to avoid string allocation during traversal. -pub(crate) enum PathSegment<'a> { - Key(&'a str), - Index(usize), -} - -/// A lazy JSON Pointer path that avoids string building during schema traversal. -/// The path is only materialized to a `String` when actually needed (e.g., for skeleton entries). -pub(crate) struct PathStack<'a> { - /// Owned initial path from the queue entry (empty string for root resources). - base: String, - /// Dynamically accumulated segments as the traversal descends. - segments: Vec>, -} - -impl<'a> PathStack<'a> { - #[inline] - pub(crate) fn from_base(base: String) -> Self { - Self { - base, - segments: Vec::new(), - } - } - - /// Push a key segment. Returns a checkpoint to restore with `truncate`. - #[inline] - pub(crate) fn push_key(&mut self, key: &'a str) -> usize { - let checkpoint = self.segments.len(); - self.segments.push(PathSegment::Key(key)); - checkpoint - } - - /// Push a numeric index segment. Returns a checkpoint to restore with `truncate`. - #[inline] - pub(crate) fn push_index(&mut self, idx: usize) -> usize { - let checkpoint = self.segments.len(); - self.segments.push(PathSegment::Index(idx)); - checkpoint - } - - /// Restore the stack to the given checkpoint (removing segments added after it). - #[inline] - pub(crate) fn truncate(&mut self, checkpoint: usize) { - self.segments.truncate(checkpoint); - } - - #[inline] - pub(crate) fn base_pointer(&self) -> &str { - &self.base - } - - #[inline] - pub(crate) fn segments(&self) -> &[PathSegment<'a>] { - &self.segments - } - - /// Materialize the full JSON Pointer path as an owned `String`. - /// Only called when a skeleton entry is actually needed. - pub(crate) fn to_pointer(&self) -> String { - if self.segments.is_empty() { - return self.base.clone(); - } - let extra = self - .segments - .iter() - .map(|segment| match segment { - PathSegment::Key(key) => 1 + key.len(), - PathSegment::Index(idx) => 1 + idx.checked_ilog10().unwrap_or(0) as usize + 1, - }) - .sum::(); - let mut s = String::with_capacity(self.base.len() + extra); - s.push_str(&self.base); - for seg in &self.segments { - match seg { - PathSegment::Key(k) => { - s.push('/'); - write_escaped_str(&mut s, k); - } - PathSegment::Index(i) => { - s.push('/'); - write_index(&mut s, *i); - } - } - } - s - } -} +use crate::{Draft, Error, Resolved, Resolver, Segments}; /// An owned document with a concrete interpretation under a JSON Schema specification. #[derive(Debug, Clone, PartialEq, Eq)] diff --git a/crates/jsonschema-referencing/src/specification/draft201909.rs b/crates/jsonschema-referencing/src/specification/draft201909.rs index 0e982121..9e9c4130 100644 --- a/crates/jsonschema-referencing/src/specification/draft201909.rs +++ b/crates/jsonschema-referencing/src/specification/draft201909.rs @@ -1,17 +1,79 @@ use serde_json::Value; -use crate::{resource::PathStack, specification::Draft, Error, Resolver, ResourceRef, Segments}; +use crate::{specification::Draft, Error, JsonPointerNode, Resolver, ResourceRef, Segments}; use super::subresources::{self, SubresourceIteratorInner}; -pub(crate) fn walk_subresources_with_path<'a, E, F>( +pub(crate) fn walk_borrowed_subresources<'a, E, F>( contents: &'a Value, - path: &mut PathStack<'a>, draft: Draft, f: &mut F, ) -> Result<(), E> where - F: FnMut(&mut PathStack<'a>, &'a Value, Draft) -> Result<(), E>, + F: FnMut(&'a Value, Draft) -> Result<(), E>, +{ + let Some(schema) = contents.as_object() else { + return Ok(()); + }; + for (key, value) in schema { + match key.as_str() { + "additionalItems" + | "additionalProperties" + | "contains" + | "contentSchema" + | "else" + | "if" + | "not" + | "propertyNames" + | "then" + | "unevaluatedItems" + | "unevaluatedProperties" => f(value, draft.detect(value))?, + "allOf" | "anyOf" | "oneOf" => { + if let Some(arr) = value.as_array() { + for item in arr { + f(item, draft.detect(item))?; + } + } + } + "$defs" | "definitions" | "dependentSchemas" | "patternProperties" | "properties" => { + if let Some(obj) = value.as_object() { + for child_value in obj.values() { + f(child_value, draft.detect(child_value))?; + } + } + } + "items" => match value { + Value::Array(arr) => { + for item in arr { + f(item, draft.detect(item))?; + } + } + _ => f(value, draft.detect(value))?, + }, + "dependencies" => { + if let Some(obj) = value.as_object() { + for child_value in obj.values() { + if !child_value.is_object() { + continue; + } + f(child_value, draft.detect(child_value))?; + } + } + } + _ => {} + } + } + Ok(()) +} + +pub(crate) fn walk_owned_subresources<'a, E, F>( + contents: &'a Value, + path: &JsonPointerNode<'_, '_>, + draft: Draft, + f: &mut F, +) -> Result<(), E> +where + F: FnMut(&JsonPointerNode<'_, '_>, &'a Value, Draft) -> Result<(), E>, { let Some(schema) = contents.as_object() else { return Ok(()); @@ -29,58 +91,49 @@ where | "then" | "unevaluatedItems" | "unevaluatedProperties" => { - let c = path.push_key(key); - f(path, value, draft.detect(value))?; - path.truncate(c); + let child_path = path.push(key.as_str()); + f(&child_path, value, draft.detect(value))?; } "allOf" | "anyOf" | "oneOf" => { if let Some(arr) = value.as_array() { - let c1 = path.push_key(key); + let parent_path = path.push(key.as_str()); for (i, item) in arr.iter().enumerate() { - let c2 = path.push_index(i); - f(path, item, draft.detect(item))?; - path.truncate(c2); + let child_path = parent_path.push(i); + f(&child_path, item, draft.detect(item))?; } - path.truncate(c1); } } "$defs" | "definitions" | "dependentSchemas" | "patternProperties" | "properties" => { if let Some(obj) = value.as_object() { - let c1 = path.push_key(key); + let parent_path = path.push(key.as_str()); for (child_key, child_value) in obj { - let c2 = path.push_key(child_key); - f(path, child_value, draft.detect(child_value))?; - path.truncate(c2); + let child_path = parent_path.push(child_key.as_str()); + f(&child_path, child_value, draft.detect(child_value))?; } - path.truncate(c1); } } "items" => { - let c1 = path.push_key("items"); + let parent_path = path.push("items"); match value { Value::Array(arr) => { for (i, item) in arr.iter().enumerate() { - let c2 = path.push_index(i); - f(path, item, draft.detect(item))?; - path.truncate(c2); + let child_path = parent_path.push(i); + f(&child_path, item, draft.detect(item))?; } } - _ => f(path, value, draft.detect(value))?, + _ => f(&parent_path, value, draft.detect(value))?, } - path.truncate(c1); } "dependencies" => { if let Some(obj) = value.as_object() { - let c1 = path.push_key(key); + let parent_path = path.push(key.as_str()); for (child_key, child_value) in obj { if !child_value.is_object() { continue; } - let c2 = path.push_key(child_key); - f(path, child_value, draft.detect(child_value))?; - path.truncate(c2); + let child_path = parent_path.push(child_key.as_str()); + f(&child_path, child_value, draft.detect(child_value))?; } - path.truncate(c1); } } _ => {} diff --git a/crates/jsonschema-referencing/src/specification/draft4.rs b/crates/jsonschema-referencing/src/specification/draft4.rs index 6ff5e652..93baccc5 100644 --- a/crates/jsonschema-referencing/src/specification/draft4.rs +++ b/crates/jsonschema-referencing/src/specification/draft4.rs @@ -1,17 +1,200 @@ use serde_json::Value; -use crate::{resource::PathStack, specification::Draft, Error, Resolver, ResourceRef, Segments}; +use crate::{specification::Draft, Error, JsonPointerNode, Resolver, ResourceRef, Segments}; use super::subresources::{self, SubresourceIteratorInner}; -pub(crate) fn walk_subresources_with_path<'a, E, F>( +pub(crate) struct BorrowedObjectProbe<'a> { + pub(crate) id: Option<&'a str>, + pub(crate) has_anchor: bool, + pub(crate) has_ref_or_schema: bool, +} + +pub(crate) fn probe_borrowed_object(contents: &Value) -> Option> { + let schema = contents.as_object()?; + + let raw_id = schema.get("id").and_then(Value::as_str); + let has_ref = schema.get("$ref").and_then(Value::as_str).is_some(); + let has_ref_or_schema = has_ref || schema.get("$schema").and_then(Value::as_str).is_some(); + let mut has_anchor = false; + if let Some(id) = raw_id { + has_anchor = id.starts_with('#'); + } + + let id = match raw_id { + Some(id) if !id.starts_with('#') && !has_ref => Some(id), + _ => None, + }; + + Some(BorrowedObjectProbe { + id, + has_anchor, + has_ref_or_schema, + }) +} + +pub(crate) fn scan_borrowed_object_into_scratch<'a>( + contents: &'a Value, + draft: Draft, + references: &mut Vec<(&'a str, &'static str)>, + children: &mut Vec<(&'a Value, Draft)>, +) -> Option<()> { + let schema = contents.as_object()?; + + for (key, value) in schema { + match key.as_str() { + "$ref" => { + if let Some(reference) = value.as_str() { + references.push((reference, "$ref")); + } + } + "$schema" => { + if let Some(reference) = value.as_str() { + references.push((reference, "$schema")); + } + } + "additionalItems" | "additionalProperties" if value.is_object() => { + crate::observe_registry!("registry.draft4.keyword={}", key); + children.push((value, draft.detect(value))); + } + "contains" + | "contentSchema" + | "else" + | "if" + | "not" + | "propertyNames" + | "then" + | "unevaluatedItems" + | "unevaluatedProperties" => { + crate::observe_registry!("registry.draft4.keyword={}", key); + children.push((value, draft.detect(value))); + } + "allOf" | "anyOf" | "oneOf" | "prefixItems" => { + crate::observe_registry!("registry.draft4.keyword={}", key); + if let Some(arr) = value.as_array() { + for item in arr { + children.push((item, draft.detect(item))); + } + } + } + "$defs" | "definitions" | "dependentSchemas" | "patternProperties" | "properties" => { + crate::observe_registry!("registry.draft4.keyword={}", key); + if let Some(obj) = value.as_object() { + for child_value in obj.values() { + children.push((child_value, draft.detect(child_value))); + } + } + } + "items" => { + crate::observe_registry!("registry.draft4.keyword=items"); + match value { + Value::Array(arr) => { + for item in arr { + children.push((item, draft.detect(item))); + } + } + _ => children.push((value, draft.detect(value))), + } + } + "dependencies" => { + crate::observe_registry!("registry.draft4.keyword=dependencies"); + if let Some(obj) = value.as_object() { + for child_value in obj.values() { + if !child_value.is_object() { + continue; + } + children.push((child_value, draft.detect(child_value))); + } + } + } + _ => {} + } + } + + Some(()) +} + +pub(crate) fn walk_borrowed_subresources<'a, E, F>( + contents: &'a Value, + draft: Draft, + f: &mut F, +) -> Result<(), E> +where + F: FnMut(&'a Value, Draft) -> Result<(), E>, +{ + let Some(schema) = contents.as_object() else { + return Ok(()); + }; + for (key, value) in schema { + match key.as_str() { + "additionalItems" | "additionalProperties" if value.is_object() => { + crate::observe_registry!("registry.draft4.keyword={}", key); + f(value, draft.detect(value))?; + } + "contains" + | "contentSchema" + | "else" + | "if" + | "not" + | "propertyNames" + | "then" + | "unevaluatedItems" + | "unevaluatedProperties" => { + crate::observe_registry!("registry.draft4.keyword={}", key); + f(value, draft.detect(value))?; + } + "allOf" | "anyOf" | "oneOf" | "prefixItems" => { + crate::observe_registry!("registry.draft4.keyword={}", key); + if let Some(arr) = value.as_array() { + for item in arr { + f(item, draft.detect(item))?; + } + } + } + "$defs" | "definitions" | "dependentSchemas" | "patternProperties" | "properties" => { + crate::observe_registry!("registry.draft4.keyword={}", key); + if let Some(obj) = value.as_object() { + for child_value in obj.values() { + f(child_value, draft.detect(child_value))?; + } + } + } + "items" => { + crate::observe_registry!("registry.draft4.keyword=items"); + match value { + Value::Array(arr) => { + for item in arr { + f(item, draft.detect(item))?; + } + } + _ => f(value, draft.detect(value))?, + } + } + "dependencies" => { + crate::observe_registry!("registry.draft4.keyword=dependencies"); + if let Some(obj) = value.as_object() { + for child_value in obj.values() { + if !child_value.is_object() { + continue; + } + f(child_value, draft.detect(child_value))?; + } + } + } + _ => {} + } + } + Ok(()) +} + +pub(crate) fn walk_owned_subresources<'a, E, F>( contents: &'a Value, - path: &mut PathStack<'a>, + path: &JsonPointerNode<'_, '_>, draft: Draft, f: &mut F, ) -> Result<(), E> where - F: FnMut(&mut PathStack<'a>, &'a Value, Draft) -> Result<(), E>, + F: FnMut(&JsonPointerNode<'_, '_>, &'a Value, Draft) -> Result<(), E>, { let Some(schema) = contents.as_object() else { return Ok(()); @@ -19,9 +202,8 @@ where for (key, value) in schema { match key.as_str() { "additionalItems" | "additionalProperties" if value.is_object() => { - let c = path.push_key(key); - f(path, value, draft.detect(value))?; - path.truncate(c); + let child_path = path.push(key.as_str()); + f(&child_path, value, draft.detect(value))?; } "contains" | "contentSchema" @@ -32,58 +214,49 @@ where | "then" | "unevaluatedItems" | "unevaluatedProperties" => { - let c = path.push_key(key); - f(path, value, draft.detect(value))?; - path.truncate(c); + let child_path = path.push(key.as_str()); + f(&child_path, value, draft.detect(value))?; } "allOf" | "anyOf" | "oneOf" | "prefixItems" => { if let Some(arr) = value.as_array() { - let c1 = path.push_key(key); + let parent_path = path.push(key.as_str()); for (i, item) in arr.iter().enumerate() { - let c2 = path.push_index(i); - f(path, item, draft.detect(item))?; - path.truncate(c2); + let child_path = parent_path.push(i); + f(&child_path, item, draft.detect(item))?; } - path.truncate(c1); } } "$defs" | "definitions" | "dependentSchemas" | "patternProperties" | "properties" => { if let Some(obj) = value.as_object() { - let c1 = path.push_key(key); + let parent_path = path.push(key.as_str()); for (child_key, child_value) in obj { - let c2 = path.push_key(child_key); - f(path, child_value, draft.detect(child_value))?; - path.truncate(c2); + let child_path = parent_path.push(child_key.as_str()); + f(&child_path, child_value, draft.detect(child_value))?; } - path.truncate(c1); } } "items" => { - let c1 = path.push_key(key); + let parent_path = path.push(key.as_str()); match value { Value::Array(arr) => { for (i, item) in arr.iter().enumerate() { - let c2 = path.push_index(i); - f(path, item, draft.detect(item))?; - path.truncate(c2); + let child_path = parent_path.push(i); + f(&child_path, item, draft.detect(item))?; } } - _ => f(path, value, draft.detect(value))?, + _ => f(&parent_path, value, draft.detect(value))?, } - path.truncate(c1); } "dependencies" => { if let Some(obj) = value.as_object() { - let c1 = path.push_key(key); + let parent_path = path.push(key.as_str()); for (child_key, child_value) in obj { if !child_value.is_object() { continue; } - let c2 = path.push_key(child_key); - f(path, child_value, draft.detect(child_value))?; - path.truncate(c2); + let child_path = parent_path.push(child_key.as_str()); + f(&child_path, child_value, draft.detect(child_value))?; } - path.truncate(c1); } } _ => {} @@ -166,3 +339,61 @@ pub(crate) fn maybe_in_subresource<'r>( IN_CHILD, ) } + +#[cfg(test)] +mod tests { + use super::{probe_borrowed_object, scan_borrowed_object_into_scratch}; + use crate::Draft; + use serde_json::json; + + #[test] + fn test_probe_borrowed_object_collects_control_keys() { + let schema = json!({ + "id": "http://example.com/node", + "$schema": "http://example.com/meta", + "properties": { + "name": {"type": "string"} + }, + "items": {"type": "integer"} + }); + let analysis = probe_borrowed_object(&schema).expect("schema object should be analyzed"); + + assert_eq!(analysis.id, Some("http://example.com/node")); + assert!(!analysis.has_anchor); + assert!(analysis.has_ref_or_schema); + } + + #[test] + fn test_scan_borrowed_object_into_scratch_collects_refs_and_children() { + let schema = json!({ + "id": "http://example.com/node", + "$schema": "http://example.com/meta", + "properties": { + "name": {"type": "string"} + }, + "items": {"type": "integer"} + }); + let mut references = Vec::new(); + let mut children = Vec::new(); + + scan_borrowed_object_into_scratch(&schema, Draft::Draft4, &mut references, &mut children) + .expect("schema object should be scanned"); + + assert_eq!( + references + .iter() + .map(|(reference, key): &(&str, &'static str)| { + (key.to_string(), reference.to_string()) + }) + .collect::>(), + vec![("$schema".to_string(), "http://example.com/meta".to_string())] + ); + let children: Vec<_> = children + .iter() + .map(|(child, child_draft)| ((*child).clone(), *child_draft)) + .collect(); + assert_eq!(children.len(), 2); + assert!(children.contains(&(json!({"type": "string"}), Draft::Draft4))); + assert!(children.contains(&(json!({"type": "integer"}), Draft::Draft4))); + } +} diff --git a/crates/jsonschema-referencing/src/specification/draft6.rs b/crates/jsonschema-referencing/src/specification/draft6.rs index 82ed7f7b..cf077ef9 100644 --- a/crates/jsonschema-referencing/src/specification/draft6.rs +++ b/crates/jsonschema-referencing/src/specification/draft6.rs @@ -1,17 +1,16 @@ use serde_json::Value; -use crate::{resource::PathStack, specification::Draft, Error, Resolver, ResourceRef, Segments}; +use crate::{specification::Draft, Error, JsonPointerNode, Resolver, ResourceRef, Segments}; use super::subresources::{self, SubresourceIteratorInner}; -pub(crate) fn walk_subresources_with_path<'a, E, F>( +pub(crate) fn walk_borrowed_subresources<'a, E, F>( contents: &'a Value, - path: &mut PathStack<'a>, draft: Draft, f: &mut F, ) -> Result<(), E> where - F: FnMut(&mut PathStack<'a>, &'a Value, Draft) -> Result<(), E>, + F: FnMut(&'a Value, Draft) -> Result<(), E>, { let Some(schema) = contents.as_object() else { return Ok(()); @@ -19,58 +18,104 @@ where for (key, value) in schema { match key.as_str() { "additionalItems" | "additionalProperties" | "contains" | "not" | "propertyNames" => { - let c = path.push_key(key); - f(path, value, draft.detect(value))?; - path.truncate(c); + f(value, draft.detect(value))?; } "allOf" | "anyOf" | "oneOf" => { if let Some(arr) = value.as_array() { - let c1 = path.push_key(key); + for item in arr { + f(item, draft.detect(item))?; + } + } + } + "definitions" | "patternProperties" | "properties" => { + if let Some(obj) = value.as_object() { + for child_value in obj.values() { + f(child_value, draft.detect(child_value))?; + } + } + } + "items" => match value { + Value::Array(arr) => { + for item in arr { + f(item, draft.detect(item))?; + } + } + _ => f(value, draft.detect(value))?, + }, + "dependencies" => { + if let Some(obj) = value.as_object() { + for child_value in obj.values() { + if !child_value.is_object() { + continue; + } + f(child_value, draft.detect(child_value))?; + } + } + } + _ => {} + } + } + Ok(()) +} + +pub(crate) fn walk_owned_subresources<'a, E, F>( + contents: &'a Value, + path: &JsonPointerNode<'_, '_>, + draft: Draft, + f: &mut F, +) -> Result<(), E> +where + F: FnMut(&JsonPointerNode<'_, '_>, &'a Value, Draft) -> Result<(), E>, +{ + let Some(schema) = contents.as_object() else { + return Ok(()); + }; + for (key, value) in schema { + match key.as_str() { + "additionalItems" | "additionalProperties" | "contains" | "not" | "propertyNames" => { + let child_path = path.push(key.as_str()); + f(&child_path, value, draft.detect(value))?; + } + "allOf" | "anyOf" | "oneOf" => { + if let Some(arr) = value.as_array() { + let parent_path = path.push(key.as_str()); for (i, item) in arr.iter().enumerate() { - let c2 = path.push_index(i); - f(path, item, draft.detect(item))?; - path.truncate(c2); + let child_path = parent_path.push(i); + f(&child_path, item, draft.detect(item))?; } - path.truncate(c1); } } "definitions" | "patternProperties" | "properties" => { if let Some(obj) = value.as_object() { - let c1 = path.push_key(key); + let parent_path = path.push(key.as_str()); for (child_key, child_value) in obj { - let c2 = path.push_key(child_key); - f(path, child_value, draft.detect(child_value))?; - path.truncate(c2); + let child_path = parent_path.push(child_key.as_str()); + f(&child_path, child_value, draft.detect(child_value))?; } - path.truncate(c1); } } "items" => { - let c1 = path.push_key("items"); + let parent_path = path.push("items"); match value { Value::Array(arr) => { for (i, item) in arr.iter().enumerate() { - let c2 = path.push_index(i); - f(path, item, draft.detect(item))?; - path.truncate(c2); + let child_path = parent_path.push(i); + f(&child_path, item, draft.detect(item))?; } } - _ => f(path, value, draft.detect(value))?, + _ => f(&parent_path, value, draft.detect(value))?, } - path.truncate(c1); } "dependencies" => { if let Some(obj) = value.as_object() { - let c1 = path.push_key(key); + let parent_path = path.push(key.as_str()); for (child_key, child_value) in obj { if !child_value.is_object() { continue; } - let c2 = path.push_key(child_key); - f(path, child_value, draft.detect(child_value))?; - path.truncate(c2); + let child_path = parent_path.push(child_key.as_str()); + f(&child_path, child_value, draft.detect(child_value))?; } - path.truncate(c1); } } _ => {} diff --git a/crates/jsonschema-referencing/src/specification/draft7.rs b/crates/jsonschema-referencing/src/specification/draft7.rs index ca4cde92..dab00b9d 100644 --- a/crates/jsonschema-referencing/src/specification/draft7.rs +++ b/crates/jsonschema-referencing/src/specification/draft7.rs @@ -1,17 +1,76 @@ use serde_json::Value; -use crate::{resource::PathStack, specification::Draft, Error, Resolver, ResourceRef, Segments}; +use crate::{specification::Draft, Error, JsonPointerNode, Resolver, ResourceRef, Segments}; use super::subresources::{self, SubresourceIteratorInner}; -pub(crate) fn walk_subresources_with_path<'a, E, F>( +pub(crate) fn walk_borrowed_subresources<'a, E, F>( contents: &'a Value, - path: &mut PathStack<'a>, draft: Draft, f: &mut F, ) -> Result<(), E> where - F: FnMut(&mut PathStack<'a>, &'a Value, Draft) -> Result<(), E>, + F: FnMut(&'a Value, Draft) -> Result<(), E>, +{ + let Some(schema) = contents.as_object() else { + return Ok(()); + }; + for (key, value) in schema { + match key.as_str() { + "additionalItems" + | "additionalProperties" + | "contains" + | "else" + | "if" + | "not" + | "propertyNames" + | "then" => f(value, draft.detect(value))?, + "allOf" | "anyOf" | "oneOf" => { + if let Some(arr) = value.as_array() { + for item in arr { + f(item, draft.detect(item))?; + } + } + } + "definitions" | "patternProperties" | "properties" => { + if let Some(obj) = value.as_object() { + for child_value in obj.values() { + f(child_value, draft.detect(child_value))?; + } + } + } + "items" => match value { + Value::Array(arr) => { + for item in arr { + f(item, draft.detect(item))?; + } + } + _ => f(value, draft.detect(value))?, + }, + "dependencies" => { + if let Some(obj) = value.as_object() { + for child_value in obj.values() { + if !child_value.is_object() { + continue; + } + f(child_value, draft.detect(child_value))?; + } + } + } + _ => {} + } + } + Ok(()) +} + +pub(crate) fn walk_owned_subresources<'a, E, F>( + contents: &'a Value, + path: &JsonPointerNode<'_, '_>, + draft: Draft, + f: &mut F, +) -> Result<(), E> +where + F: FnMut(&JsonPointerNode<'_, '_>, &'a Value, Draft) -> Result<(), E>, { let Some(schema) = contents.as_object() else { return Ok(()); @@ -26,58 +85,49 @@ where | "not" | "propertyNames" | "then" => { - let c = path.push_key(key); - f(path, value, draft.detect(value))?; - path.truncate(c); + let child_path = path.push(key.as_str()); + f(&child_path, value, draft.detect(value))?; } "allOf" | "anyOf" | "oneOf" => { if let Some(arr) = value.as_array() { - let c1 = path.push_key(key); + let parent_path = path.push(key.as_str()); for (i, item) in arr.iter().enumerate() { - let c2 = path.push_index(i); - f(path, item, draft.detect(item))?; - path.truncate(c2); + let child_path = parent_path.push(i); + f(&child_path, item, draft.detect(item))?; } - path.truncate(c1); } } "definitions" | "patternProperties" | "properties" => { if let Some(obj) = value.as_object() { - let c1 = path.push_key(key); + let parent_path = path.push(key.as_str()); for (child_key, child_value) in obj { - let c2 = path.push_key(child_key); - f(path, child_value, draft.detect(child_value))?; - path.truncate(c2); + let child_path = parent_path.push(child_key.as_str()); + f(&child_path, child_value, draft.detect(child_value))?; } - path.truncate(c1); } } "items" => { - let c1 = path.push_key("items"); + let parent_path = path.push("items"); match value { Value::Array(arr) => { for (i, item) in arr.iter().enumerate() { - let c2 = path.push_index(i); - f(path, item, draft.detect(item))?; - path.truncate(c2); + let child_path = parent_path.push(i); + f(&child_path, item, draft.detect(item))?; } } - _ => f(path, value, draft.detect(value))?, + _ => f(&parent_path, value, draft.detect(value))?, } - path.truncate(c1); } "dependencies" => { if let Some(obj) = value.as_object() { - let c1 = path.push_key(key); + let parent_path = path.push(key.as_str()); for (child_key, child_value) in obj { if !child_value.is_object() { continue; } - let c2 = path.push_key(child_key); - f(path, child_value, draft.detect(child_value))?; - path.truncate(c2); + let child_path = parent_path.push(child_key.as_str()); + f(&child_path, child_value, draft.detect(child_value))?; } - path.truncate(c1); } } _ => {} diff --git a/crates/jsonschema-referencing/src/specification/mod.rs b/crates/jsonschema-referencing/src/specification/mod.rs index d04b35ce..7c8233f1 100644 --- a/crates/jsonschema-referencing/src/specification/mod.rs +++ b/crates/jsonschema-referencing/src/specification/mod.rs @@ -11,7 +11,7 @@ mod subresources; use crate::{ anchors, vocabularies::{VocabularySet, DRAFT_2019_09_VOCABULARIES, DRAFT_2020_12_VOCABULARIES}, - Anchor, Error, Resolver, Resource, ResourceRef, Segments, + Anchor, Error, JsonPointerNode, Resolver, Resource, ResourceRef, Segments, }; /// JSON Schema specification versions. @@ -162,6 +162,65 @@ impl Draft { None => SubresourceIterator::Empty, } } + pub(crate) fn walk_borrowed_subresources<'a, E, F>( + self, + contents: &'a Value, + f: &mut F, + ) -> Result<(), E> + where + F: FnMut(&'a Value, Draft) -> Result<(), E>, + { + match self { + Draft::Draft4 => draft4::walk_borrowed_subresources(contents, self, f), + Draft::Draft6 => draft6::walk_borrowed_subresources(contents, self, f), + Draft::Draft7 => draft7::walk_borrowed_subresources(contents, self, f), + Draft::Draft201909 => draft201909::walk_borrowed_subresources(contents, self, f), + Draft::Draft202012 | Draft::Unknown => { + subresources::walk_borrowed_subresources(contents, self, f) + } + } + } + pub(crate) fn probe_draft4_borrowed_object( + self, + contents: &Value, + ) -> Option> { + match self { + Draft::Draft4 => draft4::probe_borrowed_object(contents), + _ => None, + } + } + pub(crate) fn scan_draft4_borrowed_object_into_scratch<'a>( + self, + contents: &'a Value, + references: &mut Vec<(&'a str, &'static str)>, + children: &mut Vec<(&'a Value, Draft)>, + ) -> Option<()> { + match self { + Draft::Draft4 => { + draft4::scan_borrowed_object_into_scratch(contents, self, references, children) + } + _ => None, + } + } + pub(crate) fn walk_owned_subresources<'a, E, F>( + self, + contents: &'a Value, + path: &JsonPointerNode<'_, '_>, + f: &mut F, + ) -> Result<(), E> + where + F: FnMut(&JsonPointerNode<'_, '_>, &'a Value, Draft) -> Result<(), E>, + { + match self { + Draft::Draft4 => draft4::walk_owned_subresources(contents, path, self, f), + Draft::Draft6 => draft6::walk_owned_subresources(contents, path, self, f), + Draft::Draft7 => draft7::walk_owned_subresources(contents, path, self, f), + Draft::Draft201909 => draft201909::walk_owned_subresources(contents, path, self, f), + Draft::Draft202012 | Draft::Unknown => { + subresources::walk_owned_subresources(contents, path, self, f) + } + } + } pub(crate) fn anchors(self, contents: &Value) -> impl Iterator> { match self { Draft::Draft4 => anchors::legacy_anchor_in_id(self, contents), @@ -188,26 +247,6 @@ impl Draft { } } } - pub(crate) fn walk_subresources_with_path<'a, E, F>( - self, - contents: &'a Value, - path: &mut crate::resource::PathStack<'a>, - f: &mut F, - ) -> Result<(), E> - where - F: FnMut(&mut crate::resource::PathStack<'a>, &'a Value, Draft) -> Result<(), E>, - { - match self { - Draft::Draft4 => draft4::walk_subresources_with_path(contents, path, self, f), - Draft::Draft6 => draft6::walk_subresources_with_path(contents, path, self, f), - Draft::Draft7 => draft7::walk_subresources_with_path(contents, path, self, f), - Draft::Draft201909 => draft201909::walk_subresources_with_path(contents, path, self, f), - Draft::Draft202012 | Draft::Unknown => { - subresources::walk_subresources_with_path(contents, path, self, f) - } - } - } - /// Identifies known JSON schema keywords per draft. #[must_use] pub fn is_known_keyword(&self, keyword: &str) -> bool { diff --git a/crates/jsonschema-referencing/src/specification/subresources.rs b/crates/jsonschema-referencing/src/specification/subresources.rs index 7ee689b3..eb9d72c8 100644 --- a/crates/jsonschema-referencing/src/specification/subresources.rs +++ b/crates/jsonschema-referencing/src/specification/subresources.rs @@ -4,22 +4,18 @@ use std::iter::FlatMap; use serde_json::Value; use crate::{ - resource::PathStack, segments::Segment, specification::Draft, Error, Resolver, ResourceRef, + segments::Segment, specification::Draft, Error, JsonPointerNode, Resolver, ResourceRef, Segments, }; -/// Walk the direct subresources of `contents` (Draft 2020-12 / Unknown), -/// calling `f(path, &Value, Draft)` for each one. -/// `path` is the lazy JSON pointer to the current node; segments are pushed before each -/// call to `f` and popped afterward. -pub(crate) fn walk_subresources_with_path<'a, E, F>( +pub(crate) fn walk_owned_subresources<'a, E, F>( contents: &'a Value, - path: &mut PathStack<'a>, + path: &JsonPointerNode<'_, '_>, draft: Draft, f: &mut F, ) -> Result<(), E> where - F: FnMut(&mut PathStack<'a>, &'a Value, Draft) -> Result<(), E>, + F: FnMut(&JsonPointerNode<'_, '_>, &'a Value, Draft) -> Result<(), E>, { let Some(schema) = contents.as_object() else { return Ok(()); @@ -37,30 +33,69 @@ where | "then" | "unevaluatedItems" | "unevaluatedProperties" => { - let c = path.push_key(key); - f(path, value, draft.detect(value))?; - path.truncate(c); + let child_path = path.push(key.as_str()); + f(&child_path, value, draft.detect(value))?; } "allOf" | "anyOf" | "oneOf" | "prefixItems" => { if let Some(arr) = value.as_array() { - let c1 = path.push_key(key); + let parent_path = path.push(key.as_str()); for (i, item) in arr.iter().enumerate() { - let c2 = path.push_index(i); - f(path, item, draft.detect(item))?; - path.truncate(c2); + let child_path = parent_path.push(i); + f(&child_path, item, draft.detect(item))?; } - path.truncate(c1); } } "$defs" | "definitions" | "dependentSchemas" | "patternProperties" | "properties" => { if let Some(obj) = value.as_object() { - let c1 = path.push_key(key); + let parent_path = path.push(key.as_str()); for (child_key, child_value) in obj { - let c2 = path.push_key(child_key); - f(path, child_value, draft.detect(child_value))?; - path.truncate(c2); + let child_path = parent_path.push(child_key.as_str()); + f(&child_path, child_value, draft.detect(child_value))?; + } + } + } + _ => {} + } + } + Ok(()) +} + +pub(crate) fn walk_borrowed_subresources<'a, E, F>( + contents: &'a Value, + draft: Draft, + f: &mut F, +) -> Result<(), E> +where + F: FnMut(&'a Value, Draft) -> Result<(), E>, +{ + let Some(schema) = contents.as_object() else { + return Ok(()); + }; + for (key, value) in schema { + match key.as_str() { + "additionalProperties" + | "contains" + | "contentSchema" + | "else" + | "if" + | "items" + | "not" + | "propertyNames" + | "then" + | "unevaluatedItems" + | "unevaluatedProperties" => f(value, draft.detect(value))?, + "allOf" | "anyOf" | "oneOf" | "prefixItems" => { + if let Some(arr) = value.as_array() { + for item in arr { + f(item, draft.detect(item))?; + } + } + } + "$defs" | "definitions" | "dependentSchemas" | "patternProperties" | "properties" => { + if let Some(obj) = value.as_object() { + for child_value in obj.values() { + f(child_value, draft.detect(child_value))?; } - path.truncate(c1); } } _ => {} @@ -404,4 +439,58 @@ mod tests { "Draft {draft:?} should return empty subresources for boolean schema", ); } + + #[test] + fn test_walk_borrowed_subresources_matches_iterator_order() { + let schema = json!({ + "properties": { + "name": {"type": "string"} + }, + "allOf": [ + {"minimum": 1} + ] + }); + let expected: Vec<_> = Draft::Draft202012 + .subresources_of(&schema) + .map(|subschema| (subschema.clone(), Draft::Draft202012.detect(subschema))) + .collect(); + let mut seen = Vec::new(); + + Draft::Draft202012 + .walk_borrowed_subresources(&schema, &mut |subschema, draft| { + seen.push((subschema.clone(), draft)); + Ok::<(), ()>(()) + }) + .unwrap(); + + assert_eq!(seen, expected); + } + + #[test] + fn test_walk_owned_subresources_reports_pointer_path() { + let schema = json!({ + "properties": { + "name": {"type": "string"} + } + }); + let root = crate::JsonPointerNode::new(); + let mut seen = Vec::new(); + + Draft::Draft202012 + .walk_owned_subresources(&schema, &root, &mut |path, subschema, draft| { + let pointer = crate::OwnedJsonPointer::from(path); + seen.push((pointer.as_str().to_string(), subschema.clone(), draft)); + Ok::<(), ()>(()) + }) + .unwrap(); + + assert_eq!( + seen, + vec![( + "/properties/name".to_string(), + json!({"type": "string"}), + Draft::Draft202012, + )] + ); + } } diff --git a/crates/jsonschema/src/paths.rs b/crates/jsonschema/src/paths.rs index b2c0234a..edbb669a 100644 --- a/crates/jsonschema/src/paths.rs +++ b/crates/jsonschema/src/paths.rs @@ -5,7 +5,9 @@ use std::{ sync::{Arc, OnceLock}, }; -use referencing::{unescape_segment, write_escaped_str, write_index}; +use referencing::{ + unescape_segment, write_escaped_str, write_index, JsonPointerNode, JsonPointerSegment, +}; use crate::keywords::Keyword; @@ -31,39 +33,7 @@ impl fmt::Display for LocationSegment<'_> { /// /// [`LazyLocation`] builds a path incrementally during JSON Schema validation without allocating /// memory until required by storing each segment on the stack. -#[derive(Debug, Clone, Eq, PartialEq)] -pub struct LazyLocation<'a, 'b> { - pub(crate) segment: LocationSegment<'a>, - pub(crate) parent: Option<&'b LazyLocation<'b, 'a>>, -} - -impl Default for LazyLocation<'_, '_> { - fn default() -> Self { - LazyLocation::new() - } -} - -impl<'a> LazyLocation<'a, '_> { - /// Create a root node of a JSON pointer. - #[must_use] - pub const fn new() -> Self { - LazyLocation { - // The value does not matter, it will never be used - segment: LocationSegment::Index(0), - parent: None, - } - } - - /// Push a new segment to the JSON pointer. - #[inline] - #[must_use] - pub fn push(&'a self, segment: impl Into>) -> Self { - LazyLocation { - segment: segment.into(), - parent: Some(self), - } - } -} +pub type LazyLocation<'a, 'b> = JsonPointerNode<'a, 'b>; /// Cached empty location - very common for root-level errors. static EMPTY_LOCATION: OnceLock = OnceLock::new(); @@ -91,15 +61,15 @@ impl<'a> From<&'a LazyLocation<'_, '_>> for Location { const STACK_CAPACITY: usize = 16; // Fast path: empty location - if value.parent.is_none() { + if value.parent().is_none() { return Location::new(); } // Fast path: single index segment (very common for array validation) // Use cached locations for indices 0-15 to avoid allocation - if let Some(parent) = value.parent { - if parent.parent.is_none() { - if let LocationSegment::Index(idx) = &value.segment { + if let Some(parent) = value.parent() { + if parent.parent().is_none() { + if let JsonPointerSegment::Index(idx) = value.segment() { if *idx < 16 { return get_cached_index_paths()[*idx].clone(); } @@ -121,11 +91,11 @@ impl<'a> From<&'a LazyLocation<'_, '_>> for Location { let mut string_capacity = 0; let mut head = value; - while let Some(next) = head.parent { + while let Some(next) = head.parent() { capacity += 1; - string_capacity += match &head.segment { - LocationSegment::Property(property) => property.len() + 1, - LocationSegment::Index(idx) => idx.checked_ilog10().unwrap_or(0) as usize + 2, + string_capacity += match head.segment() { + JsonPointerSegment::Key(property) => property.len() + 1, + JsonPointerSegment::Index(idx) => idx.checked_ilog10().unwrap_or(0) as usize + 2, }; head = next; } @@ -134,20 +104,20 @@ impl<'a> From<&'a LazyLocation<'_, '_>> for Location { if capacity <= STACK_CAPACITY { // Stack-allocated storage with references - no cloning needed - let mut stack_segments: [Option<&LocationSegment<'_>>; STACK_CAPACITY] = + let mut stack_segments: [Option<&JsonPointerSegment<'_>>; STACK_CAPACITY] = [None; STACK_CAPACITY]; let mut idx = 0; head = value; - if head.parent.is_some() { - stack_segments[idx] = Some(&head.segment); + if head.parent().is_some() { + stack_segments[idx] = Some(head.segment()); idx += 1; } - while let Some(next) = head.parent { + while let Some(next) = head.parent() { head = next; - if head.parent.is_some() { - stack_segments[idx] = Some(&head.segment); + if head.parent().is_some() { + stack_segments[idx] = Some(head.segment()); idx += 1; } } @@ -156,37 +126,37 @@ impl<'a> From<&'a LazyLocation<'_, '_>> for Location { for segment in stack_segments[..idx].iter().rev().flatten() { buffer.push('/'); match segment { - LocationSegment::Property(property) => { + JsonPointerSegment::Key(property) => { write_escaped_str(&mut buffer, property); } - LocationSegment::Index(idx) => { + JsonPointerSegment::Index(idx) => { write_index(&mut buffer, *idx); } } } } else { // Heap-allocated fallback for deep paths (>16 segments) - let mut segments: Vec<&LocationSegment<'_>> = Vec::with_capacity(capacity); + let mut segments: Vec<&JsonPointerSegment<'_>> = Vec::with_capacity(capacity); head = value; - if head.parent.is_some() { - segments.push(&head.segment); + if head.parent().is_some() { + segments.push(head.segment()); } - while let Some(next) = head.parent { + while let Some(next) = head.parent() { head = next; - if head.parent.is_some() { - segments.push(&head.segment); + if head.parent().is_some() { + segments.push(head.segment()); } } for segment in segments.iter().rev() { buffer.push('/'); match segment { - LocationSegment::Property(property) => { + JsonPointerSegment::Key(property) => { write_escaped_str(&mut buffer, property); } - LocationSegment::Index(idx) => { + JsonPointerSegment::Index(idx) => { write_index(&mut buffer, *idx); } } @@ -571,6 +541,15 @@ impl From for LocationSegment<'_> { } } +impl<'a> From> for JsonPointerSegment<'a> { + fn from(value: LocationSegment<'a>) -> Self { + match value { + LocationSegment::Property(property) => JsonPointerSegment::Key(property), + LocationSegment::Index(idx) => JsonPointerSegment::Index(idx), + } + } +} + /// A cheap to clone JSON pointer that represents location with a JSON value. #[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)] pub struct Location(Arc); @@ -752,6 +731,17 @@ mod tests { assert_eq!(location.as_str(), expected); } + #[test] + fn test_lazy_location_converts_to_referencing_owned_json_pointer() { + let root = LazyLocation::new(); + let nested = root.push("foo/bar~baz"); + let loc = nested.push(2usize); + + let pointer = referencing::OwnedJsonPointer::from(&loc); + + assert_eq!(pointer.as_str(), "/foo~1bar~0baz/2"); + } + #[test] fn test_location_join_multiple() { let loc = Location::new(); diff --git a/profiler/Cargo.toml b/profiler/Cargo.toml index 5d465b2b..7d207e67 100644 --- a/profiler/Cargo.toml +++ b/profiler/Cargo.toml @@ -16,6 +16,7 @@ serde_json = "1" [features] dhat-heap = [] +perf-observe-registry = ["referencing/perf-observe-registry"] [profile.release] debug = true diff --git a/profiler/Justfile b/profiler/Justfile index ca224e6e..22f26593 100644 --- a/profiler/Justfile +++ b/profiler/Justfile @@ -10,6 +10,10 @@ flame preset method iterations="10000": @echo "Opening {{preset}}-{{method}}.svg in browser..." @xdg-open {{preset}}-{{method}}.svg 2>/dev/null || open {{preset}}-{{method}}.svg 2>/dev/null || echo "Please open {{preset}}-{{method}}.svg manually" +observe-registry preset iterations="1": + #!/bin/zsh + cargo run --release --features perf-observe-registry --package jsonschema-profiler -- --preset {{preset}} --method registry --iterations {{iterations}} | counts + # Profile with dhat using a preset dhat preset method iterations="10000": cargo run --release --features dhat-heap --package jsonschema-profiler \ From b5741d23a2a9a3fa83f1dbbf6a48527b2f6bb803 Mon Sep 17 00:00:00 2001 From: Dmitry Dygalo Date: Tue, 17 Mar 2026 23:21:37 +0100 Subject: [PATCH 04/14] wip Signed-off-by: Dmitry Dygalo --- crates/jsonschema-referencing/src/registry.rs | 255 ++++++++---------- .../src/specification/draft201909.rs | 92 ++++++- .../src/specification/draft4.rs | 64 +++-- .../src/specification/draft6.rs | 86 +++++- .../src/specification/draft7.rs | 93 ++++++- .../src/specification/mod.rs | 39 ++- .../src/specification/subresources.rs | 78 +++++- 7 files changed, 523 insertions(+), 184 deletions(-) diff --git a/crates/jsonschema-referencing/src/registry.rs b/crates/jsonschema-referencing/src/registry.rs index 373aca9a..c640d719 100644 --- a/crates/jsonschema-referencing/src/registry.rs +++ b/crates/jsonschema-referencing/src/registry.rs @@ -1068,8 +1068,8 @@ struct ProcessingState<'a> { /// pre-stored value address; non-subresource paths (e.g. `#/components/schemas/Foo`) /// are still fully traversed. deferred_refs: Vec, - draft4_reference_scratch: Vec<(&'a str, &'static str)>, - draft4_child_scratch: Vec<(&'a Value, Draft)>, + borrowed_reference_scratch: crate::specification::BorrowedReferenceSlots<'a>, + borrowed_child_scratch: Vec<(&'a Value, Draft)>, index_data: PreparedIndex<'a>, } @@ -1084,8 +1084,8 @@ impl ProcessingState<'_> { custom_metaschemas: Vec::new(), visited_schemas: AHashSet::new(), deferred_refs: Vec::new(), - draft4_reference_scratch: Vec::new(), - draft4_child_scratch: Vec::new(), + borrowed_reference_scratch: crate::specification::BorrowedReferenceSlots::default(), + borrowed_child_scratch: Vec::new(), index_data: PreparedIndex::default(), } } @@ -1246,10 +1246,8 @@ fn explore_borrowed_subtree<'r>( if let Some(object) = subschema.as_object() { crate::observe_registry!("registry.borrowed.object_len={}", object.len()); } - if draft == Draft::Draft4 { - let Some(probe) = draft.probe_draft4_borrowed_object(subschema) else { - return Ok(()); - }; + let probe = draft.probe_borrowed_object(subschema); + if let Some(probe) = probe.as_ref() { #[cfg(feature = "perf-observe-registry")] { let id_scan = match (probe.id.is_some(), probe.has_anchor) { @@ -1283,120 +1281,106 @@ fn explore_borrowed_subtree<'r>( subschema, ); } + } + + if probe.as_ref().is_some_and(|probe| probe.has_ref_or_schema) { + let child_start = state.borrowed_child_scratch.len(); + draft + .scan_borrowed_object_into_scratch( + subschema, + &mut state.borrowed_reference_scratch, + &mut state.borrowed_child_scratch, + ) + .expect("scratch scan should succeed when probe marked the object interesting"); + let child_end = state.borrowed_child_scratch.len(); let subschema_ptr = std::ptr::from_ref::(subschema) as usize; - if state.visited_schemas.insert(subschema_ptr) - && probe.has_ref_or_schema { - let ref_start = state.draft4_reference_scratch.len(); - let child_start = state.draft4_child_scratch.len(); - draft - .scan_draft4_borrowed_object_into_scratch( - subschema, - &mut state.draft4_reference_scratch, - &mut state.draft4_child_scratch, - ) - .expect("draft4 scan should always see an object here"); - let ref_end = state.draft4_reference_scratch.len(); - let child_end = state.draft4_child_scratch.len(); - - for idx in ref_start..ref_end { - let (reference, key) = state.draft4_reference_scratch[idx]; - if reference.starts_with("https://json-schema.org/draft/") - || reference.starts_with("http://json-schema.org/draft-") - || current_base_uri - .as_str() - .starts_with("https://json-schema.org/draft/") - { - if key == "$ref" { - state.refers_metaschemas = true; - } - continue; - } - if reference == "#" { - continue; + if state.visited_schemas.insert(subschema_ptr) { + for (reference, key) in [ + (state.borrowed_reference_scratch.ref_, "$ref"), + (state.borrowed_reference_scratch.schema, "$schema"), + ] { + let Some(reference) = reference else { + continue; + }; + if reference.starts_with("https://json-schema.org/draft/") + || reference.starts_with("http://json-schema.org/draft-") + || current_base_uri + .as_str() + .starts_with("https://json-schema.org/draft/") + { + if key == "$ref" { + state.refers_metaschemas = true; } - if reference.starts_with('#') { - if mark_local_reference(local_seen, ¤t_base_uri, reference) { - let ptr = reference.trim_start_matches('#'); - if let Some(referenced) = pointer(document_root, ptr) { - let target_draft = draft.detect(referenced); - let value_addr = std::ptr::from_ref::(referenced) as usize; - state.deferred_refs.push(( - Arc::clone(¤t_base_uri), - Arc::clone(document_root_uri), - ptr.to_string(), - target_draft, - value_addr, - )); - } + continue; + } + if reference == "#" { + continue; + } + if reference.starts_with('#') { + if mark_local_reference(local_seen, ¤t_base_uri, reference) { + let ptr = reference.trim_start_matches('#'); + if let Some(referenced) = pointer(document_root, ptr) { + let target_draft = draft.detect(referenced); + let value_addr = std::ptr::from_ref::(referenced) as usize; + state.deferred_refs.push(( + Arc::clone(¤t_base_uri), + Arc::clone(document_root_uri), + ptr.to_string(), + target_draft, + value_addr, + )); } - continue; } - if mark_reference(&mut state.seen, ¤t_base_uri, reference) { - let resolved = if current_base_uri.has_fragment() { - let mut base_without_fragment = current_base_uri.as_ref().clone(); - base_without_fragment.set_fragment(None); - - let (path, fragment) = match reference.split_once('#') { - Some((path, fragment)) => (path, Some(fragment)), - None => (reference, None), - }; - - let mut resolved = (*resolution_cache - .resolve_against(&base_without_fragment.borrow(), path)?) - .clone(); - if let Some(fragment) = fragment { - if let Some(encoded) = uri::EncodedString::new(fragment) { - resolved = resolved.with_fragment(Some(encoded)); - } else { - uri::encode_to(fragment, &mut state.scratch); - resolved = resolved.with_fragment(Some( - uri::EncodedString::new_or_panic(&state.scratch), - )); - state.scratch.clear(); - } - } - resolved - } else { - (*resolution_cache - .resolve_against(¤t_base_uri.borrow(), reference)?) - .clone() - }; + continue; + } + if mark_reference(&mut state.seen, ¤t_base_uri, reference) { + let resolved = if current_base_uri.has_fragment() { + let mut base_without_fragment = current_base_uri.as_ref().clone(); + base_without_fragment.set_fragment(None); - let kind = if key == "$schema" { - ReferenceKind::Schema - } else { - ReferenceKind::Ref + let (path, fragment) = match reference.split_once('#') { + Some((path, fragment)) => (path, Some(fragment)), + None => (reference, None), }; - state - .external - .insert((reference.to_string(), resolved, kind)); - } - } - let mut idx = child_start; - while idx < child_end { - let (child, child_draft) = state.draft4_child_scratch[idx]; - idx += 1; - explore_borrowed_subtree( - Arc::clone(¤t_base_uri), - document_root, - child, - child_draft, - false, - document_root_uri, - state, - known_resources, - resolution_cache, - local_seen, - )?; - } + let mut resolved = (*resolution_cache + .resolve_against(&base_without_fragment.borrow(), path)?) + .clone(); + if let Some(fragment) = fragment { + if let Some(encoded) = uri::EncodedString::new(fragment) { + resolved = resolved.with_fragment(Some(encoded)); + } else { + uri::encode_to(fragment, &mut state.scratch); + resolved = resolved.with_fragment(Some( + uri::EncodedString::new_or_panic(&state.scratch), + )); + state.scratch.clear(); + } + } + resolved + } else { + (*resolution_cache + .resolve_against(¤t_base_uri.borrow(), reference)?) + .clone() + }; - state.draft4_reference_scratch.truncate(ref_start); - state.draft4_child_scratch.truncate(child_start); - return Ok(()); + let kind = if key == "$schema" { + ReferenceKind::Schema + } else { + ReferenceKind::Ref + }; + state + .external + .insert((reference.to_string(), resolved, kind)); + } } - draft.walk_borrowed_subresources(subschema, &mut |child, child_draft| { + } + + let mut idx = child_start; + while idx < child_end { + let (child, child_draft) = state.borrowed_child_scratch[idx]; + idx += 1; explore_borrowed_subtree( Arc::clone(¤t_base_uri), document_root, @@ -1408,47 +1392,18 @@ fn explore_borrowed_subtree<'r>( known_resources, resolution_cache, local_seen, - ) - })?; - return Ok(()); - } - let (id, has_anchors) = draft.id_and_has_anchors(subschema); - #[cfg(feature = "perf-observe-registry")] - { - let id_scan = match (id.is_some(), has_anchors) { - (false, false) => "none", - (true, false) => "id_only", - (false, true) => "anchor_only", - (true, true) => "id_and_anchor", - }; - crate::observe_registry!("registry.id_scan={id_scan}"); - } - if let Some(id) = id { - let original_base_uri = Arc::clone(¤t_base_uri); - current_base_uri = resolve_id(¤t_base_uri, id, resolution_cache)?; - known_resources.insert((*current_base_uri).clone()); - let insert_resource = current_base_uri != original_base_uri; - if !(is_root_entry && current_base_uri == *document_root_uri) { - insert_borrowed_discovered_index_entries( - &mut state.index_data, - ¤t_base_uri, - draft, - insert_resource, - subschema, - ); + )?; } - } else if has_anchors && !is_root_entry { - insert_borrowed_discovered_index_entries( - &mut state.index_data, - ¤t_base_uri, - draft, - false, - subschema, - ); - } + state.borrowed_reference_scratch.ref_ = None; + state.borrowed_reference_scratch.schema = None; + state.borrowed_child_scratch.truncate(child_start); + return Ok(()); + } let subschema_ptr = std::ptr::from_ref::(subschema) as usize; - if state.visited_schemas.insert(subschema_ptr) { + if state.visited_schemas.insert(subschema_ptr) + && probe.as_ref().is_none_or(|probe| probe.has_ref_or_schema) + { collect_external_resources( ¤t_base_uri, document_root, diff --git a/crates/jsonschema-referencing/src/specification/draft201909.rs b/crates/jsonschema-referencing/src/specification/draft201909.rs index 9e9c4130..da667c79 100644 --- a/crates/jsonschema-referencing/src/specification/draft201909.rs +++ b/crates/jsonschema-referencing/src/specification/draft201909.rs @@ -1,9 +1,99 @@ use serde_json::Value; -use crate::{specification::Draft, Error, JsonPointerNode, Resolver, ResourceRef, Segments}; +use crate::{ + specification::{BorrowedObjectProbe, BorrowedReferenceSlots, Draft}, + Error, JsonPointerNode, Resolver, ResourceRef, Segments, +}; use super::subresources::{self, SubresourceIteratorInner}; +pub(crate) fn probe_borrowed_object(contents: &Value) -> Option> { + let schema = contents.as_object()?; + + let id = schema.get("$id").and_then(Value::as_str); + let has_anchor = schema.get("$anchor").and_then(Value::as_str).is_some(); + let has_ref_or_schema = schema.get("$ref").and_then(Value::as_str).is_some() + || schema.get("$schema").and_then(Value::as_str).is_some(); + + Some(BorrowedObjectProbe { + id, + has_anchor, + has_ref_or_schema, + }) +} + +pub(crate) fn scan_borrowed_object_into_scratch<'a>( + contents: &'a Value, + draft: Draft, + references: &mut BorrowedReferenceSlots<'a>, + children: &mut Vec<(&'a Value, Draft)>, +) -> Option<()> { + let schema = contents.as_object()?; + + for (key, value) in schema { + match key.as_str() { + "$ref" => { + if let Some(reference) = value.as_str() { + references.ref_ = Some(reference); + } + } + "$schema" => { + if let Some(reference) = value.as_str() { + references.schema = Some(reference); + } + } + "additionalItems" + | "additionalProperties" + | "contains" + | "contentSchema" + | "else" + | "if" + | "not" + | "propertyNames" + | "then" + | "unevaluatedItems" + | "unevaluatedProperties" => { + children.push((value, draft.detect(value))); + } + "allOf" | "anyOf" | "oneOf" => { + if let Some(arr) = value.as_array() { + for item in arr { + children.push((item, draft.detect(item))); + } + } + } + "$defs" | "definitions" | "dependentSchemas" | "patternProperties" | "properties" => { + if let Some(obj) = value.as_object() { + for child_value in obj.values() { + children.push((child_value, draft.detect(child_value))); + } + } + } + "items" => match value { + Value::Array(arr) => { + for item in arr { + children.push((item, draft.detect(item))); + } + } + _ => children.push((value, draft.detect(value))), + }, + "dependencies" => { + if let Some(obj) = value.as_object() { + for child_value in obj.values() { + if !child_value.is_object() { + continue; + } + children.push((child_value, draft.detect(child_value))); + } + } + } + _ => {} + } + } + + Some(()) +} + pub(crate) fn walk_borrowed_subresources<'a, E, F>( contents: &'a Value, draft: Draft, diff --git a/crates/jsonschema-referencing/src/specification/draft4.rs b/crates/jsonschema-referencing/src/specification/draft4.rs index 93baccc5..a8642e32 100644 --- a/crates/jsonschema-referencing/src/specification/draft4.rs +++ b/crates/jsonschema-referencing/src/specification/draft4.rs @@ -1,21 +1,36 @@ use serde_json::Value; -use crate::{specification::Draft, Error, JsonPointerNode, Resolver, ResourceRef, Segments}; +use crate::{ + specification::{BorrowedObjectProbe, BorrowedReferenceSlots, Draft}, + Error, JsonPointerNode, Resolver, ResourceRef, Segments, +}; use super::subresources::{self, SubresourceIteratorInner}; -pub(crate) struct BorrowedObjectProbe<'a> { - pub(crate) id: Option<&'a str>, - pub(crate) has_anchor: bool, - pub(crate) has_ref_or_schema: bool, -} - pub(crate) fn probe_borrowed_object(contents: &Value) -> Option> { let schema = contents.as_object()?; - let raw_id = schema.get("id").and_then(Value::as_str); - let has_ref = schema.get("$ref").and_then(Value::as_str).is_some(); - let has_ref_or_schema = has_ref || schema.get("$schema").and_then(Value::as_str).is_some(); + let (raw_id, has_ref, has_ref_or_schema) = if schema.len() <= 3 { + let mut raw_id = None; + let mut has_ref = false; + let mut has_schema = false; + + for (key, value) in schema { + match key.as_str() { + "id" => raw_id = value.as_str(), + "$ref" => has_ref = value.is_string(), + "$schema" => has_schema = value.is_string(), + _ => {} + } + } + + (raw_id, has_ref, has_ref || has_schema) + } else { + let raw_id = schema.get("id").and_then(Value::as_str); + let has_ref = schema.get("$ref").and_then(Value::as_str).is_some(); + let has_ref_or_schema = has_ref || schema.get("$schema").and_then(Value::as_str).is_some(); + (raw_id, has_ref, has_ref_or_schema) + }; let mut has_anchor = false; if let Some(id) = raw_id { has_anchor = id.starts_with('#'); @@ -36,7 +51,7 @@ pub(crate) fn probe_borrowed_object(contents: &Value) -> Option( contents: &'a Value, draft: Draft, - references: &mut Vec<(&'a str, &'static str)>, + references: &mut BorrowedReferenceSlots<'a>, children: &mut Vec<(&'a Value, Draft)>, ) -> Option<()> { let schema = contents.as_object()?; @@ -45,12 +60,12 @@ pub(crate) fn scan_borrowed_object_into_scratch<'a>( match key.as_str() { "$ref" => { if let Some(reference) = value.as_str() { - references.push((reference, "$ref")); + references.ref_ = Some(reference); } } "$schema" => { if let Some(reference) = value.as_str() { - references.push((reference, "$schema")); + references.schema = Some(reference); } } "additionalItems" | "additionalProperties" if value.is_object() => { @@ -343,7 +358,7 @@ pub(crate) fn maybe_in_subresource<'r>( #[cfg(test)] mod tests { use super::{probe_borrowed_object, scan_borrowed_object_into_scratch}; - use crate::Draft; + use crate::{specification::BorrowedReferenceSlots, Draft}; use serde_json::json; #[test] @@ -373,20 +388,27 @@ mod tests { }, "items": {"type": "integer"} }); - let mut references = Vec::new(); + let mut references = BorrowedReferenceSlots::default(); let mut children = Vec::new(); scan_borrowed_object_into_scratch(&schema, Draft::Draft4, &mut references, &mut children) .expect("schema object should be scanned"); assert_eq!( - references - .iter() - .map(|(reference, key): &(&str, &'static str)| { - (key.to_string(), reference.to_string()) - }) - .collect::>(), + ( + references.ref_.map(str::to_string), + references.schema.map(str::to_string) + ), vec![("$schema".to_string(), "http://example.com/meta".to_string())] + .into_iter() + .fold((None, None), |mut acc, (key, value)| { + if key == "$ref" { + acc.0 = Some(value); + } else { + acc.1 = Some(value); + } + acc + }) ); let children: Vec<_> = children .iter() diff --git a/crates/jsonschema-referencing/src/specification/draft6.rs b/crates/jsonschema-referencing/src/specification/draft6.rs index cf077ef9..a69a63a3 100644 --- a/crates/jsonschema-referencing/src/specification/draft6.rs +++ b/crates/jsonschema-referencing/src/specification/draft6.rs @@ -1,9 +1,93 @@ use serde_json::Value; -use crate::{specification::Draft, Error, JsonPointerNode, Resolver, ResourceRef, Segments}; +use crate::{ + specification::{BorrowedObjectProbe, BorrowedReferenceSlots, Draft}, + Error, JsonPointerNode, Resolver, ResourceRef, Segments, +}; use super::subresources::{self, SubresourceIteratorInner}; +pub(crate) fn probe_borrowed_object(contents: &Value) -> Option> { + let schema = contents.as_object()?; + + let raw_id = schema.get("$id").and_then(Value::as_str); + let has_ref = schema.get("$ref").and_then(Value::as_str).is_some(); + let has_ref_or_schema = has_ref || schema.get("$schema").and_then(Value::as_str).is_some(); + let has_anchor = raw_id.is_some_and(|id| id.starts_with('#')); + let id = match raw_id { + Some(id) if !id.starts_with('#') && !has_ref => Some(id), + _ => None, + }; + + Some(BorrowedObjectProbe { + id, + has_anchor, + has_ref_or_schema, + }) +} + +pub(crate) fn scan_borrowed_object_into_scratch<'a>( + contents: &'a Value, + draft: Draft, + references: &mut BorrowedReferenceSlots<'a>, + children: &mut Vec<(&'a Value, Draft)>, +) -> Option<()> { + let schema = contents.as_object()?; + + for (key, value) in schema { + match key.as_str() { + "$ref" => { + if let Some(reference) = value.as_str() { + references.ref_ = Some(reference); + } + } + "$schema" => { + if let Some(reference) = value.as_str() { + references.schema = Some(reference); + } + } + "additionalItems" | "additionalProperties" | "contains" | "not" | "propertyNames" => { + children.push((value, draft.detect(value))); + } + "allOf" | "anyOf" | "oneOf" => { + if let Some(arr) = value.as_array() { + for item in arr { + children.push((item, draft.detect(item))); + } + } + } + "definitions" | "patternProperties" | "properties" => { + if let Some(obj) = value.as_object() { + for child_value in obj.values() { + children.push((child_value, draft.detect(child_value))); + } + } + } + "items" => match value { + Value::Array(arr) => { + for item in arr { + children.push((item, draft.detect(item))); + } + } + _ => children.push((value, draft.detect(value))), + }, + "dependencies" => { + if let Some(obj) = value.as_object() { + for child_value in obj.values() { + if !child_value.is_object() { + continue; + } + children.push((child_value, draft.detect(child_value))); + } + } + } + _ => {} + } + } + + Some(()) +} + pub(crate) fn walk_borrowed_subresources<'a, E, F>( contents: &'a Value, draft: Draft, diff --git a/crates/jsonschema-referencing/src/specification/draft7.rs b/crates/jsonschema-referencing/src/specification/draft7.rs index dab00b9d..cf944e93 100644 --- a/crates/jsonschema-referencing/src/specification/draft7.rs +++ b/crates/jsonschema-referencing/src/specification/draft7.rs @@ -1,9 +1,100 @@ use serde_json::Value; -use crate::{specification::Draft, Error, JsonPointerNode, Resolver, ResourceRef, Segments}; +use crate::{ + specification::{BorrowedObjectProbe, BorrowedReferenceSlots, Draft}, + Error, JsonPointerNode, Resolver, ResourceRef, Segments, +}; use super::subresources::{self, SubresourceIteratorInner}; +pub(crate) fn probe_borrowed_object(contents: &Value) -> Option> { + let schema = contents.as_object()?; + + let raw_id = schema.get("$id").and_then(Value::as_str); + let has_ref = schema.get("$ref").and_then(Value::as_str).is_some(); + let has_ref_or_schema = has_ref || schema.get("$schema").and_then(Value::as_str).is_some(); + let has_anchor = raw_id.is_some_and(|id| id.starts_with('#')); + let id = match raw_id { + Some(id) if !id.starts_with('#') && !has_ref => Some(id), + _ => None, + }; + + Some(BorrowedObjectProbe { + id, + has_anchor, + has_ref_or_schema, + }) +} + +pub(crate) fn scan_borrowed_object_into_scratch<'a>( + contents: &'a Value, + draft: Draft, + references: &mut BorrowedReferenceSlots<'a>, + children: &mut Vec<(&'a Value, Draft)>, +) -> Option<()> { + let schema = contents.as_object()?; + + for (key, value) in schema { + match key.as_str() { + "$ref" => { + if let Some(reference) = value.as_str() { + references.ref_ = Some(reference); + } + } + "$schema" => { + if let Some(reference) = value.as_str() { + references.schema = Some(reference); + } + } + "additionalItems" + | "additionalProperties" + | "contains" + | "else" + | "if" + | "not" + | "propertyNames" + | "then" => { + children.push((value, draft.detect(value))); + } + "allOf" | "anyOf" | "oneOf" => { + if let Some(arr) = value.as_array() { + for item in arr { + children.push((item, draft.detect(item))); + } + } + } + "definitions" | "patternProperties" | "properties" => { + if let Some(obj) = value.as_object() { + for child_value in obj.values() { + children.push((child_value, draft.detect(child_value))); + } + } + } + "items" => match value { + Value::Array(arr) => { + for item in arr { + children.push((item, draft.detect(item))); + } + } + _ => children.push((value, draft.detect(value))), + }, + "dependencies" => { + if let Some(obj) = value.as_object() { + for child_value in obj.values() { + if !child_value.is_object() { + continue; + } + children.push((child_value, draft.detect(child_value))); + } + } + } + _ => {} + } + } + + Some(()) +} + pub(crate) fn walk_borrowed_subresources<'a, E, F>( contents: &'a Value, draft: Draft, diff --git a/crates/jsonschema-referencing/src/specification/mod.rs b/crates/jsonschema-referencing/src/specification/mod.rs index 7c8233f1..1854baa4 100644 --- a/crates/jsonschema-referencing/src/specification/mod.rs +++ b/crates/jsonschema-referencing/src/specification/mod.rs @@ -14,6 +14,18 @@ use crate::{ Anchor, Error, JsonPointerNode, Resolver, Resource, ResourceRef, Segments, }; +pub(crate) struct BorrowedObjectProbe<'a> { + pub(crate) id: Option<&'a str>, + pub(crate) has_anchor: bool, + pub(crate) has_ref_or_schema: bool, +} + +#[derive(Default)] +pub(crate) struct BorrowedReferenceSlots<'a> { + pub(crate) ref_: Option<&'a str>, + pub(crate) schema: Option<&'a str>, +} + /// JSON Schema specification versions. #[non_exhaustive] #[derive(Debug, Default, PartialEq, Copy, Clone, Hash, Eq, PartialOrd, Ord)] @@ -180,26 +192,37 @@ impl Draft { } } } - pub(crate) fn probe_draft4_borrowed_object( - self, - contents: &Value, - ) -> Option> { + pub(crate) fn probe_borrowed_object(self, contents: &Value) -> Option> { match self { Draft::Draft4 => draft4::probe_borrowed_object(contents), - _ => None, + Draft::Draft6 => draft6::probe_borrowed_object(contents), + Draft::Draft7 => draft7::probe_borrowed_object(contents), + Draft::Draft201909 => draft201909::probe_borrowed_object(contents), + Draft::Draft202012 | Draft::Unknown => subresources::probe_borrowed_object(contents), } } - pub(crate) fn scan_draft4_borrowed_object_into_scratch<'a>( + pub(crate) fn scan_borrowed_object_into_scratch<'a>( self, contents: &'a Value, - references: &mut Vec<(&'a str, &'static str)>, + references: &mut BorrowedReferenceSlots<'a>, children: &mut Vec<(&'a Value, Draft)>, ) -> Option<()> { match self { Draft::Draft4 => { draft4::scan_borrowed_object_into_scratch(contents, self, references, children) } - _ => None, + Draft::Draft6 => { + draft6::scan_borrowed_object_into_scratch(contents, self, references, children) + } + Draft::Draft7 => { + draft7::scan_borrowed_object_into_scratch(contents, self, references, children) + } + Draft::Draft201909 => { + draft201909::scan_borrowed_object_into_scratch(contents, self, references, children) + } + Draft::Draft202012 | Draft::Unknown => subresources::scan_borrowed_object_into_scratch( + contents, self, references, children, + ), } } pub(crate) fn walk_owned_subresources<'a, E, F>( diff --git a/crates/jsonschema-referencing/src/specification/subresources.rs b/crates/jsonschema-referencing/src/specification/subresources.rs index eb9d72c8..1cd8c20c 100644 --- a/crates/jsonschema-referencing/src/specification/subresources.rs +++ b/crates/jsonschema-referencing/src/specification/subresources.rs @@ -4,10 +4,84 @@ use std::iter::FlatMap; use serde_json::Value; use crate::{ - segments::Segment, specification::Draft, Error, JsonPointerNode, Resolver, ResourceRef, - Segments, + segments::Segment, + specification::{BorrowedObjectProbe, BorrowedReferenceSlots, Draft}, + Error, JsonPointerNode, Resolver, ResourceRef, Segments, }; +pub(crate) fn probe_borrowed_object(contents: &Value) -> Option> { + let schema = contents.as_object()?; + + let id = schema.get("$id").and_then(Value::as_str); + let has_anchor = schema.get("$anchor").and_then(Value::as_str).is_some() + || schema + .get("$dynamicAnchor") + .and_then(Value::as_str) + .is_some(); + let has_ref_or_schema = schema.get("$ref").and_then(Value::as_str).is_some() + || schema.get("$schema").and_then(Value::as_str).is_some(); + + Some(BorrowedObjectProbe { + id, + has_anchor, + has_ref_or_schema, + }) +} + +pub(crate) fn scan_borrowed_object_into_scratch<'a>( + contents: &'a Value, + draft: Draft, + references: &mut BorrowedReferenceSlots<'a>, + children: &mut Vec<(&'a Value, Draft)>, +) -> Option<()> { + let schema = contents.as_object()?; + + for (key, value) in schema { + match key.as_str() { + "$ref" => { + if let Some(reference) = value.as_str() { + references.ref_ = Some(reference); + } + } + "$schema" => { + if let Some(reference) = value.as_str() { + references.schema = Some(reference); + } + } + "additionalProperties" + | "contains" + | "contentSchema" + | "else" + | "if" + | "items" + | "not" + | "propertyNames" + | "then" + | "unevaluatedItems" + | "unevaluatedProperties" => { + children.push((value, draft.detect(value))); + } + "allOf" | "anyOf" | "oneOf" | "prefixItems" => { + if let Some(arr) = value.as_array() { + for item in arr { + children.push((item, draft.detect(item))); + } + } + } + "$defs" | "definitions" | "dependentSchemas" | "patternProperties" | "properties" => { + if let Some(obj) = value.as_object() { + for child_value in obj.values() { + children.push((child_value, draft.detect(child_value))); + } + } + } + _ => {} + } + } + + Some(()) +} + pub(crate) fn walk_owned_subresources<'a, E, F>( contents: &'a Value, path: &JsonPointerNode<'_, '_>, From 437f7fb5d8346f4f1369d8c92e00e4478e000de3 Mon Sep 17 00:00:00 2001 From: Dmitry Dygalo Date: Wed, 18 Mar 2026 00:54:11 +0100 Subject: [PATCH 05/14] wip Signed-off-by: Dmitry Dygalo --- crates/jsonschema-referencing/Cargo.toml | 1 + crates/jsonschema-referencing/src/registry.rs | 292 ++++++------- .../src/specification/draft201909.rs | 43 +- .../src/specification/draft4.rs | 83 +--- .../src/specification/draft6.rs | 47 +-- .../src/specification/draft7.rs | 47 +-- .../src/specification/mod.rs | 390 +++++++++++++----- .../src/specification/subresources.rs | 72 ++-- 8 files changed, 512 insertions(+), 463 deletions(-) diff --git a/crates/jsonschema-referencing/Cargo.toml b/crates/jsonschema-referencing/Cargo.toml index 62eda696..8dab6ec3 100644 --- a/crates/jsonschema-referencing/Cargo.toml +++ b/crates/jsonschema-referencing/Cargo.toml @@ -29,6 +29,7 @@ percent-encoding = "2.3.1" serde_json.workspace = true hashbrown = "0.16" micromap = "0.3.0" +bytecount = { version = "0.6", features = ["runtime-dispatch-simd"] } [dev-dependencies] benchmark = { path = "../benchmark/" } diff --git a/crates/jsonschema-referencing/src/registry.rs b/crates/jsonschema-referencing/src/registry.rs index c640d719..81bf20f4 100644 --- a/crates/jsonschema-referencing/src/registry.rs +++ b/crates/jsonschema-referencing/src/registry.rs @@ -1242,11 +1242,12 @@ fn explore_borrowed_subtree<'r>( resolution_cache: &mut UriCache, local_seen: &mut LocalSeen<'r>, ) -> Result<(), Error> { + let object = subschema.as_object(); #[cfg(feature = "perf-observe-registry")] - if let Some(object) = subschema.as_object() { + if let Some(object) = object { crate::observe_registry!("registry.borrowed.object_len={}", object.len()); } - let probe = draft.probe_borrowed_object(subschema); + let probe = object.map(|schema| draft.probe_borrowed_object_map(schema)); if let Some(probe) = probe.as_ref() { #[cfg(feature = "perf-observe-registry")] { @@ -1283,122 +1284,122 @@ fn explore_borrowed_subtree<'r>( } } - if probe.as_ref().is_some_and(|probe| probe.has_ref_or_schema) { - let child_start = state.borrowed_child_scratch.len(); - draft - .scan_borrowed_object_into_scratch( - subschema, + if let (Some(schema), Some(probe)) = (object, probe.as_ref()) { + if probe.has_ref_or_schema { + let child_start = state.borrowed_child_scratch.len(); + draft.scan_borrowed_object_into_scratch_map( + schema, &mut state.borrowed_reference_scratch, &mut state.borrowed_child_scratch, - ) - .expect("scratch scan should succeed when probe marked the object interesting"); - let child_end = state.borrowed_child_scratch.len(); - - let subschema_ptr = std::ptr::from_ref::(subschema) as usize; - if state.visited_schemas.insert(subschema_ptr) { - for (reference, key) in [ - (state.borrowed_reference_scratch.ref_, "$ref"), - (state.borrowed_reference_scratch.schema, "$schema"), - ] { - let Some(reference) = reference else { - continue; - }; - if reference.starts_with("https://json-schema.org/draft/") - || reference.starts_with("http://json-schema.org/draft-") - || current_base_uri - .as_str() - .starts_with("https://json-schema.org/draft/") - { - if key == "$ref" { - state.refers_metaschemas = true; - } - continue; - } - if reference == "#" { - continue; - } - if reference.starts_with('#') { - if mark_local_reference(local_seen, ¤t_base_uri, reference) { - let ptr = reference.trim_start_matches('#'); - if let Some(referenced) = pointer(document_root, ptr) { - let target_draft = draft.detect(referenced); - let value_addr = std::ptr::from_ref::(referenced) as usize; - state.deferred_refs.push(( - Arc::clone(¤t_base_uri), - Arc::clone(document_root_uri), - ptr.to_string(), - target_draft, - value_addr, - )); + ); + let child_end = state.borrowed_child_scratch.len(); + + let subschema_ptr = std::ptr::from_ref::(subschema) as usize; + if state.visited_schemas.insert(subschema_ptr) { + for (reference, key) in [ + (state.borrowed_reference_scratch.ref_, "$ref"), + (state.borrowed_reference_scratch.schema, "$schema"), + ] { + let Some(reference) = reference else { + continue; + }; + if reference.starts_with("https://json-schema.org/draft/") + || reference.starts_with("http://json-schema.org/draft-") + || current_base_uri + .as_str() + .starts_with("https://json-schema.org/draft/") + { + if key == "$ref" { + state.refers_metaschemas = true; } + continue; } - continue; - } - if mark_reference(&mut state.seen, ¤t_base_uri, reference) { - let resolved = if current_base_uri.has_fragment() { - let mut base_without_fragment = current_base_uri.as_ref().clone(); - base_without_fragment.set_fragment(None); - - let (path, fragment) = match reference.split_once('#') { - Some((path, fragment)) => (path, Some(fragment)), - None => (reference, None), - }; - - let mut resolved = (*resolution_cache - .resolve_against(&base_without_fragment.borrow(), path)?) - .clone(); - if let Some(fragment) = fragment { - if let Some(encoded) = uri::EncodedString::new(fragment) { - resolved = resolved.with_fragment(Some(encoded)); - } else { - uri::encode_to(fragment, &mut state.scratch); - resolved = resolved.with_fragment(Some( - uri::EncodedString::new_or_panic(&state.scratch), + if reference == "#" { + continue; + } + if reference.starts_with('#') { + if mark_local_reference(local_seen, ¤t_base_uri, reference) { + let ptr = reference.trim_start_matches('#'); + if let Some(referenced) = pointer(document_root, ptr) { + let target_draft = draft.detect(referenced); + let value_addr = std::ptr::from_ref::(referenced) as usize; + state.deferred_refs.push(( + Arc::clone(¤t_base_uri), + Arc::clone(document_root_uri), + ptr.to_string(), + target_draft, + value_addr, )); - state.scratch.clear(); } } - resolved - } else { - (*resolution_cache - .resolve_against(¤t_base_uri.borrow(), reference)?) - .clone() - }; + continue; + } + if mark_reference(&mut state.seen, ¤t_base_uri, reference) { + let resolved = if current_base_uri.has_fragment() { + let mut base_without_fragment = current_base_uri.as_ref().clone(); + base_without_fragment.set_fragment(None); + + let (path, fragment) = match reference.split_once('#') { + Some((path, fragment)) => (path, Some(fragment)), + None => (reference, None), + }; + + let mut resolved = (*resolution_cache + .resolve_against(&base_without_fragment.borrow(), path)?) + .clone(); + if let Some(fragment) = fragment { + if let Some(encoded) = uri::EncodedString::new(fragment) { + resolved = resolved.with_fragment(Some(encoded)); + } else { + uri::encode_to(fragment, &mut state.scratch); + resolved = resolved.with_fragment(Some( + uri::EncodedString::new_or_panic(&state.scratch), + )); + state.scratch.clear(); + } + } + resolved + } else { + (*resolution_cache + .resolve_against(¤t_base_uri.borrow(), reference)?) + .clone() + }; - let kind = if key == "$schema" { - ReferenceKind::Schema - } else { - ReferenceKind::Ref - }; - state - .external - .insert((reference.to_string(), resolved, kind)); + let kind = if key == "$schema" { + ReferenceKind::Schema + } else { + ReferenceKind::Ref + }; + state + .external + .insert((reference.to_string(), resolved, kind)); + } } } - } - let mut idx = child_start; - while idx < child_end { - let (child, child_draft) = state.borrowed_child_scratch[idx]; - idx += 1; - explore_borrowed_subtree( - Arc::clone(¤t_base_uri), - document_root, - child, - child_draft, - false, - document_root_uri, - state, - known_resources, - resolution_cache, - local_seen, - )?; - } + let mut idx = child_start; + while idx < child_end { + let (child, child_draft) = state.borrowed_child_scratch[idx]; + idx += 1; + explore_borrowed_subtree( + Arc::clone(¤t_base_uri), + document_root, + child, + child_draft, + false, + document_root_uri, + state, + known_resources, + resolution_cache, + local_seen, + )?; + } - state.borrowed_reference_scratch.ref_ = None; - state.borrowed_reference_scratch.schema = None; - state.borrowed_child_scratch.truncate(child_start); - return Ok(()); + state.borrowed_reference_scratch.ref_ = None; + state.borrowed_reference_scratch.schema = None; + state.borrowed_child_scratch.truncate(child_start); + return Ok(()); + } } let subschema_ptr = std::ptr::from_ref::(subschema) as usize; if state.visited_schemas.insert(subschema_ptr) @@ -1420,20 +1421,24 @@ fn explore_borrowed_subtree<'r>( )?; } - draft.walk_borrowed_subresources(subschema, &mut |child, child_draft| { - explore_borrowed_subtree( - Arc::clone(¤t_base_uri), - document_root, - child, - child_draft, - false, - document_root_uri, - state, - known_resources, - resolution_cache, - local_seen, - ) - }) + if let Some(schema) = object { + draft.walk_borrowed_subresources_map(schema, &mut |child, child_draft| { + explore_borrowed_subtree( + Arc::clone(¤t_base_uri), + document_root, + child, + child_draft, + false, + document_root_uri, + state, + known_resources, + resolution_cache, + local_seen, + ) + }) + } else { + Ok(()) + } } fn process_owned_document<'a, 'r>( @@ -1516,7 +1521,10 @@ fn explore_owned_subtree<'a, 'r>( resolution_cache: &mut UriCache, local_seen: &mut LocalSeen<'a>, ) -> Result<(), Error> { - let (id, has_anchors) = draft.id_and_has_anchors(subschema); + let object = subschema.as_object(); + let (id, has_anchors) = object.map_or((None, false), |schema| { + draft.id_and_has_anchors_object(schema) + }); if let Some(id) = id { let original_base_uri = Arc::clone(¤t_base_uri); current_base_uri = resolve_id(¤t_base_uri, id, resolution_cache)?; @@ -1567,22 +1575,26 @@ fn explore_owned_subtree<'a, 'r>( )?; } - draft.walk_owned_subresources(subschema, path, &mut |child_path, child, child_draft| { - explore_owned_subtree( - Arc::clone(¤t_base_uri), - document_root, - child, - child_draft, - false, - child_path, - document_root_uri, - document, - state, - known_resources, - resolution_cache, - local_seen, - ) - }) + if let Some(schema) = object { + draft.walk_owned_subresources_map(schema, path, &mut |child_path, child, child_draft| { + explore_owned_subtree( + Arc::clone(¤t_base_uri), + document_root, + child, + child_draft, + false, + child_path, + document_root_uri, + document, + state, + known_resources, + resolution_cache, + local_seen, + ) + }) + } else { + Ok(()) + } } fn enqueue_fragment_entry( @@ -2231,11 +2243,7 @@ fn resolve_id( pub fn pointer<'a>(document: &'a Value, pointer: &str) -> Option<&'a Value> { crate::observe_registry!( "registry.pointer_segments={}", - pointer - .as_bytes() - .iter() - .filter(|&&byte| byte == b'/') - .count() + bytecount::count(pointer.as_bytes(), b'/') ); if pointer.is_empty() { return Some(document); diff --git a/crates/jsonschema-referencing/src/specification/draft201909.rs b/crates/jsonschema-referencing/src/specification/draft201909.rs index da667c79..52214ab5 100644 --- a/crates/jsonschema-referencing/src/specification/draft201909.rs +++ b/crates/jsonschema-referencing/src/specification/draft201909.rs @@ -1,35 +1,18 @@ -use serde_json::Value; +use serde_json::{Map, Value}; use crate::{ - specification::{BorrowedObjectProbe, BorrowedReferenceSlots, Draft}, + specification::{BorrowedReferenceSlots, Draft}, Error, JsonPointerNode, Resolver, ResourceRef, Segments, }; use super::subresources::{self, SubresourceIteratorInner}; -pub(crate) fn probe_borrowed_object(contents: &Value) -> Option> { - let schema = contents.as_object()?; - - let id = schema.get("$id").and_then(Value::as_str); - let has_anchor = schema.get("$anchor").and_then(Value::as_str).is_some(); - let has_ref_or_schema = schema.get("$ref").and_then(Value::as_str).is_some() - || schema.get("$schema").and_then(Value::as_str).is_some(); - - Some(BorrowedObjectProbe { - id, - has_anchor, - has_ref_or_schema, - }) -} - -pub(crate) fn scan_borrowed_object_into_scratch<'a>( - contents: &'a Value, +pub(crate) fn scan_borrowed_object_into_scratch_map<'a>( + schema: &'a Map, draft: Draft, references: &mut BorrowedReferenceSlots<'a>, children: &mut Vec<(&'a Value, Draft)>, -) -> Option<()> { - let schema = contents.as_object()?; - +) { for (key, value) in schema { match key.as_str() { "$ref" => { @@ -90,21 +73,16 @@ pub(crate) fn scan_borrowed_object_into_scratch<'a>( _ => {} } } - - Some(()) } -pub(crate) fn walk_borrowed_subresources<'a, E, F>( - contents: &'a Value, +pub(crate) fn walk_borrowed_subresources_map<'a, E, F>( + schema: &'a Map, draft: Draft, f: &mut F, ) -> Result<(), E> where F: FnMut(&'a Value, Draft) -> Result<(), E>, { - let Some(schema) = contents.as_object() else { - return Ok(()); - }; for (key, value) in schema { match key.as_str() { "additionalItems" @@ -156,8 +134,8 @@ where Ok(()) } -pub(crate) fn walk_owned_subresources<'a, E, F>( - contents: &'a Value, +pub(crate) fn walk_owned_subresources_map<'a, E, F>( + schema: &'a Map, path: &JsonPointerNode<'_, '_>, draft: Draft, f: &mut F, @@ -165,9 +143,6 @@ pub(crate) fn walk_owned_subresources<'a, E, F>( where F: FnMut(&JsonPointerNode<'_, '_>, &'a Value, Draft) -> Result<(), E>, { - let Some(schema) = contents.as_object() else { - return Ok(()); - }; for (key, value) in schema { match key.as_str() { "additionalItems" diff --git a/crates/jsonschema-referencing/src/specification/draft4.rs b/crates/jsonschema-referencing/src/specification/draft4.rs index a8642e32..c26d81de 100644 --- a/crates/jsonschema-referencing/src/specification/draft4.rs +++ b/crates/jsonschema-referencing/src/specification/draft4.rs @@ -1,61 +1,18 @@ -use serde_json::Value; +use serde_json::{Map, Value}; use crate::{ - specification::{BorrowedObjectProbe, BorrowedReferenceSlots, Draft}, + specification::{BorrowedReferenceSlots, Draft}, Error, JsonPointerNode, Resolver, ResourceRef, Segments, }; use super::subresources::{self, SubresourceIteratorInner}; -pub(crate) fn probe_borrowed_object(contents: &Value) -> Option> { - let schema = contents.as_object()?; - - let (raw_id, has_ref, has_ref_or_schema) = if schema.len() <= 3 { - let mut raw_id = None; - let mut has_ref = false; - let mut has_schema = false; - - for (key, value) in schema { - match key.as_str() { - "id" => raw_id = value.as_str(), - "$ref" => has_ref = value.is_string(), - "$schema" => has_schema = value.is_string(), - _ => {} - } - } - - (raw_id, has_ref, has_ref || has_schema) - } else { - let raw_id = schema.get("id").and_then(Value::as_str); - let has_ref = schema.get("$ref").and_then(Value::as_str).is_some(); - let has_ref_or_schema = has_ref || schema.get("$schema").and_then(Value::as_str).is_some(); - (raw_id, has_ref, has_ref_or_schema) - }; - let mut has_anchor = false; - if let Some(id) = raw_id { - has_anchor = id.starts_with('#'); - } - - let id = match raw_id { - Some(id) if !id.starts_with('#') && !has_ref => Some(id), - _ => None, - }; - - Some(BorrowedObjectProbe { - id, - has_anchor, - has_ref_or_schema, - }) -} - -pub(crate) fn scan_borrowed_object_into_scratch<'a>( - contents: &'a Value, +pub(crate) fn scan_borrowed_object_into_scratch_map<'a>( + schema: &'a Map, draft: Draft, references: &mut BorrowedReferenceSlots<'a>, children: &mut Vec<(&'a Value, Draft)>, -) -> Option<()> { - let schema = contents.as_object()?; - +) { for (key, value) in schema { match key.as_str() { "$ref" => { @@ -125,21 +82,16 @@ pub(crate) fn scan_borrowed_object_into_scratch<'a>( _ => {} } } - - Some(()) } -pub(crate) fn walk_borrowed_subresources<'a, E, F>( - contents: &'a Value, +pub(crate) fn walk_borrowed_subresources_map<'a, E, F>( + schema: &'a Map, draft: Draft, f: &mut F, ) -> Result<(), E> where F: FnMut(&'a Value, Draft) -> Result<(), E>, { - let Some(schema) = contents.as_object() else { - return Ok(()); - }; for (key, value) in schema { match key.as_str() { "additionalItems" | "additionalProperties" if value.is_object() => { @@ -202,8 +154,8 @@ where Ok(()) } -pub(crate) fn walk_owned_subresources<'a, E, F>( - contents: &'a Value, +pub(crate) fn walk_owned_subresources_map<'a, E, F>( + schema: &'a Map, path: &JsonPointerNode<'_, '_>, draft: Draft, f: &mut F, @@ -211,9 +163,6 @@ pub(crate) fn walk_owned_subresources<'a, E, F>( where F: FnMut(&JsonPointerNode<'_, '_>, &'a Value, Draft) -> Result<(), E>, { - let Some(schema) = contents.as_object() else { - return Ok(()); - }; for (key, value) in schema { match key.as_str() { "additionalItems" | "additionalProperties" if value.is_object() => { @@ -357,7 +306,6 @@ pub(crate) fn maybe_in_subresource<'r>( #[cfg(test)] mod tests { - use super::{probe_borrowed_object, scan_borrowed_object_into_scratch}; use crate::{specification::BorrowedReferenceSlots, Draft}; use serde_json::json; @@ -371,7 +319,11 @@ mod tests { }, "items": {"type": "integer"} }); - let analysis = probe_borrowed_object(&schema).expect("schema object should be analyzed"); + let analysis = Draft::Draft4.probe_borrowed_object_map( + schema + .as_object() + .expect("schema object should be analyzed"), + ); assert_eq!(analysis.id, Some("http://example.com/node")); assert!(!analysis.has_anchor); @@ -391,8 +343,11 @@ mod tests { let mut references = BorrowedReferenceSlots::default(); let mut children = Vec::new(); - scan_borrowed_object_into_scratch(&schema, Draft::Draft4, &mut references, &mut children) - .expect("schema object should be scanned"); + Draft::Draft4.scan_borrowed_object_into_scratch_map( + schema.as_object().expect("schema object should be scanned"), + &mut references, + &mut children, + ); assert_eq!( ( diff --git a/crates/jsonschema-referencing/src/specification/draft6.rs b/crates/jsonschema-referencing/src/specification/draft6.rs index a69a63a3..5c66e419 100644 --- a/crates/jsonschema-referencing/src/specification/draft6.rs +++ b/crates/jsonschema-referencing/src/specification/draft6.rs @@ -1,39 +1,18 @@ -use serde_json::Value; +use serde_json::{Map, Value}; use crate::{ - specification::{BorrowedObjectProbe, BorrowedReferenceSlots, Draft}, + specification::{BorrowedReferenceSlots, Draft}, Error, JsonPointerNode, Resolver, ResourceRef, Segments, }; use super::subresources::{self, SubresourceIteratorInner}; -pub(crate) fn probe_borrowed_object(contents: &Value) -> Option> { - let schema = contents.as_object()?; - - let raw_id = schema.get("$id").and_then(Value::as_str); - let has_ref = schema.get("$ref").and_then(Value::as_str).is_some(); - let has_ref_or_schema = has_ref || schema.get("$schema").and_then(Value::as_str).is_some(); - let has_anchor = raw_id.is_some_and(|id| id.starts_with('#')); - let id = match raw_id { - Some(id) if !id.starts_with('#') && !has_ref => Some(id), - _ => None, - }; - - Some(BorrowedObjectProbe { - id, - has_anchor, - has_ref_or_schema, - }) -} - -pub(crate) fn scan_borrowed_object_into_scratch<'a>( - contents: &'a Value, +pub(crate) fn scan_borrowed_object_into_scratch_map<'a>( + schema: &'a Map, draft: Draft, references: &mut BorrowedReferenceSlots<'a>, children: &mut Vec<(&'a Value, Draft)>, -) -> Option<()> { - let schema = contents.as_object()?; - +) { for (key, value) in schema { match key.as_str() { "$ref" => { @@ -84,21 +63,16 @@ pub(crate) fn scan_borrowed_object_into_scratch<'a>( _ => {} } } - - Some(()) } -pub(crate) fn walk_borrowed_subresources<'a, E, F>( - contents: &'a Value, +pub(crate) fn walk_borrowed_subresources_map<'a, E, F>( + schema: &'a Map, draft: Draft, f: &mut F, ) -> Result<(), E> where F: FnMut(&'a Value, Draft) -> Result<(), E>, { - let Some(schema) = contents.as_object() else { - return Ok(()); - }; for (key, value) in schema { match key.as_str() { "additionalItems" | "additionalProperties" | "contains" | "not" | "propertyNames" => { @@ -142,8 +116,8 @@ where Ok(()) } -pub(crate) fn walk_owned_subresources<'a, E, F>( - contents: &'a Value, +pub(crate) fn walk_owned_subresources_map<'a, E, F>( + schema: &'a Map, path: &JsonPointerNode<'_, '_>, draft: Draft, f: &mut F, @@ -151,9 +125,6 @@ pub(crate) fn walk_owned_subresources<'a, E, F>( where F: FnMut(&JsonPointerNode<'_, '_>, &'a Value, Draft) -> Result<(), E>, { - let Some(schema) = contents.as_object() else { - return Ok(()); - }; for (key, value) in schema { match key.as_str() { "additionalItems" | "additionalProperties" | "contains" | "not" | "propertyNames" => { diff --git a/crates/jsonschema-referencing/src/specification/draft7.rs b/crates/jsonschema-referencing/src/specification/draft7.rs index cf944e93..fa5de71a 100644 --- a/crates/jsonschema-referencing/src/specification/draft7.rs +++ b/crates/jsonschema-referencing/src/specification/draft7.rs @@ -1,39 +1,18 @@ -use serde_json::Value; +use serde_json::{Map, Value}; use crate::{ - specification::{BorrowedObjectProbe, BorrowedReferenceSlots, Draft}, + specification::{BorrowedReferenceSlots, Draft}, Error, JsonPointerNode, Resolver, ResourceRef, Segments, }; use super::subresources::{self, SubresourceIteratorInner}; -pub(crate) fn probe_borrowed_object(contents: &Value) -> Option> { - let schema = contents.as_object()?; - - let raw_id = schema.get("$id").and_then(Value::as_str); - let has_ref = schema.get("$ref").and_then(Value::as_str).is_some(); - let has_ref_or_schema = has_ref || schema.get("$schema").and_then(Value::as_str).is_some(); - let has_anchor = raw_id.is_some_and(|id| id.starts_with('#')); - let id = match raw_id { - Some(id) if !id.starts_with('#') && !has_ref => Some(id), - _ => None, - }; - - Some(BorrowedObjectProbe { - id, - has_anchor, - has_ref_or_schema, - }) -} - -pub(crate) fn scan_borrowed_object_into_scratch<'a>( - contents: &'a Value, +pub(crate) fn scan_borrowed_object_into_scratch_map<'a>( + schema: &'a Map, draft: Draft, references: &mut BorrowedReferenceSlots<'a>, children: &mut Vec<(&'a Value, Draft)>, -) -> Option<()> { - let schema = contents.as_object()?; - +) { for (key, value) in schema { match key.as_str() { "$ref" => { @@ -91,21 +70,16 @@ pub(crate) fn scan_borrowed_object_into_scratch<'a>( _ => {} } } - - Some(()) } -pub(crate) fn walk_borrowed_subresources<'a, E, F>( - contents: &'a Value, +pub(crate) fn walk_borrowed_subresources_map<'a, E, F>( + schema: &'a Map, draft: Draft, f: &mut F, ) -> Result<(), E> where F: FnMut(&'a Value, Draft) -> Result<(), E>, { - let Some(schema) = contents.as_object() else { - return Ok(()); - }; for (key, value) in schema { match key.as_str() { "additionalItems" @@ -154,8 +128,8 @@ where Ok(()) } -pub(crate) fn walk_owned_subresources<'a, E, F>( - contents: &'a Value, +pub(crate) fn walk_owned_subresources_map<'a, E, F>( + schema: &'a Map, path: &JsonPointerNode<'_, '_>, draft: Draft, f: &mut F, @@ -163,9 +137,6 @@ pub(crate) fn walk_owned_subresources<'a, E, F>( where F: FnMut(&JsonPointerNode<'_, '_>, &'a Value, Draft) -> Result<(), E>, { - let Some(schema) = contents.as_object() else { - return Ok(()); - }; for (key, value) in schema { match key.as_str() { "additionalItems" diff --git a/crates/jsonschema-referencing/src/specification/mod.rs b/crates/jsonschema-referencing/src/specification/mod.rs index 1854baa4..be244262 100644 --- a/crates/jsonschema-referencing/src/specification/mod.rs +++ b/crates/jsonschema-referencing/src/specification/mod.rs @@ -1,4 +1,4 @@ -use serde_json::Value; +use serde_json::{Map, Value}; use subresources::SubresourceIterator; mod draft201909; @@ -20,6 +20,21 @@ pub(crate) struct BorrowedObjectProbe<'a> { pub(crate) has_ref_or_schema: bool, } +#[inline] +pub(crate) fn has_ref_or_schema_object(schema: &Map) -> bool { + if schema.len() <= 3 { + for (key, value) in schema { + if (key == "$ref" || key == "$schema") && value.is_string() { + return true; + } + } + false + } else { + schema.get("$ref").and_then(Value::as_str).is_some() + || schema.get("$schema").and_then(Value::as_str).is_some() + } +} + #[derive(Default)] pub(crate) struct BorrowedReferenceSlots<'a> { pub(crate) ref_: Option<&'a str>, @@ -104,59 +119,29 @@ impl Draft { } } - /// Returns `(id, has_any_anchor)` when both pieces of information are needed - /// during registry preparation. #[inline] - pub(crate) fn id_and_has_anchors(self, contents: &Value) -> (Option<&str>, bool) { - let Some(obj) = contents.as_object() else { - return (None, false); - }; + pub(crate) fn id_and_has_anchors_object( + self, + obj: &Map, + ) -> (Option<&str>, bool) { match self { - Draft::Draft4 => { - if obj.len() <= 3 { - scan_legacy_id_small(obj) - } else { - let raw_id = obj.get("id").and_then(Value::as_str); - let is_anchor = raw_id.is_some_and(|id| id.starts_with('#')); - let plain_id = match (is_anchor, obj.contains_key("$ref")) { - (false, false) => raw_id, - _ => None, - }; - (plain_id, is_anchor) - } - } - Draft::Draft6 | Draft::Draft7 => { - if obj.len() <= 3 { - scan_legacy_dollar_id_small(obj) - } else { - let raw_id = obj.get("$id").and_then(Value::as_str); - let is_anchor = raw_id.is_some_and(|id| id.starts_with('#')); - let plain_id = match (is_anchor, obj.contains_key("$ref")) { - (false, false) => raw_id, - _ => None, - }; - (plain_id, is_anchor) - } - } - Draft::Draft201909 => { - if obj.len() <= 2 { - scan_id_and_anchor_small(obj) - } else { - let id = obj.get("$id").and_then(Value::as_str); - let has_anchor = obj.get("$anchor").and_then(Value::as_str).is_some(); - (id, has_anchor) - } - } - Draft::Draft202012 | Draft::Unknown => { - if obj.len() <= 3 { - scan_id_and_any_anchor_small(obj) - } else { - let id = obj.get("$id").and_then(Value::as_str); - let has_anchor = obj.get("$anchor").and_then(Value::as_str).is_some() - || obj.get("$dynamicAnchor").and_then(Value::as_str).is_some(); - (id, has_anchor) - } - } + Draft::Draft4 => id_and_has_legacy_id_object(obj), + Draft::Draft6 | Draft::Draft7 => id_and_has_legacy_dollar_id_object(obj), + Draft::Draft201909 => id_and_has_id_and_anchor_object(obj), + Draft::Draft202012 | Draft::Unknown => id_and_has_id_and_any_anchor_object(obj), + } + } + + #[inline] + pub(crate) fn probe_borrowed_object_map( + self, + obj: &Map, + ) -> BorrowedObjectProbe<'_> { + match self { + Draft::Draft4 => analyze_legacy_id_object(obj), + Draft::Draft6 | Draft::Draft7 => analyze_legacy_dollar_id_object(obj), + Draft::Draft201909 => analyze_id_and_anchor_object(obj), + Draft::Draft202012 | Draft::Unknown => analyze_id_and_any_anchor_object(obj), } } pub fn subresources_of(self, contents: &Value) -> impl Iterator { @@ -174,60 +159,53 @@ impl Draft { None => SubresourceIterator::Empty, } } - pub(crate) fn walk_borrowed_subresources<'a, E, F>( + pub(crate) fn scan_borrowed_object_into_scratch_map<'a>( self, - contents: &'a Value, - f: &mut F, - ) -> Result<(), E> - where - F: FnMut(&'a Value, Draft) -> Result<(), E>, - { - match self { - Draft::Draft4 => draft4::walk_borrowed_subresources(contents, self, f), - Draft::Draft6 => draft6::walk_borrowed_subresources(contents, self, f), - Draft::Draft7 => draft7::walk_borrowed_subresources(contents, self, f), - Draft::Draft201909 => draft201909::walk_borrowed_subresources(contents, self, f), - Draft::Draft202012 | Draft::Unknown => { - subresources::walk_borrowed_subresources(contents, self, f) - } - } - } - pub(crate) fn probe_borrowed_object(self, contents: &Value) -> Option> { - match self { - Draft::Draft4 => draft4::probe_borrowed_object(contents), - Draft::Draft6 => draft6::probe_borrowed_object(contents), - Draft::Draft7 => draft7::probe_borrowed_object(contents), - Draft::Draft201909 => draft201909::probe_borrowed_object(contents), - Draft::Draft202012 | Draft::Unknown => subresources::probe_borrowed_object(contents), - } - } - pub(crate) fn scan_borrowed_object_into_scratch<'a>( - self, - contents: &'a Value, + contents: &'a Map, references: &mut BorrowedReferenceSlots<'a>, children: &mut Vec<(&'a Value, Draft)>, - ) -> Option<()> { + ) { match self { Draft::Draft4 => { - draft4::scan_borrowed_object_into_scratch(contents, self, references, children) + draft4::scan_borrowed_object_into_scratch_map(contents, self, references, children); } Draft::Draft6 => { - draft6::scan_borrowed_object_into_scratch(contents, self, references, children) + draft6::scan_borrowed_object_into_scratch_map(contents, self, references, children); } Draft::Draft7 => { - draft7::scan_borrowed_object_into_scratch(contents, self, references, children) + draft7::scan_borrowed_object_into_scratch_map(contents, self, references, children); } - Draft::Draft201909 => { - draft201909::scan_borrowed_object_into_scratch(contents, self, references, children) - } - Draft::Draft202012 | Draft::Unknown => subresources::scan_borrowed_object_into_scratch( + Draft::Draft201909 => draft201909::scan_borrowed_object_into_scratch_map( contents, self, references, children, ), + Draft::Draft202012 | Draft::Unknown => { + subresources::scan_borrowed_object_into_scratch_map( + contents, self, references, children, + ) + } } } - pub(crate) fn walk_owned_subresources<'a, E, F>( + pub(crate) fn walk_borrowed_subresources_map<'a, E, F>( self, - contents: &'a Value, + contents: &'a Map, + f: &mut F, + ) -> Result<(), E> + where + F: FnMut(&'a Value, Draft) -> Result<(), E>, + { + match self { + Draft::Draft4 => draft4::walk_borrowed_subresources_map(contents, self, f), + Draft::Draft6 => draft6::walk_borrowed_subresources_map(contents, self, f), + Draft::Draft7 => draft7::walk_borrowed_subresources_map(contents, self, f), + Draft::Draft201909 => draft201909::walk_borrowed_subresources_map(contents, self, f), + Draft::Draft202012 | Draft::Unknown => { + subresources::walk_borrowed_subresources_map(contents, self, f) + } + } + } + pub(crate) fn walk_owned_subresources_map<'a, E, F>( + self, + contents: &'a Map, path: &JsonPointerNode<'_, '_>, f: &mut F, ) -> Result<(), E> @@ -235,12 +213,12 @@ impl Draft { F: FnMut(&JsonPointerNode<'_, '_>, &'a Value, Draft) -> Result<(), E>, { match self { - Draft::Draft4 => draft4::walk_owned_subresources(contents, path, self, f), - Draft::Draft6 => draft6::walk_owned_subresources(contents, path, self, f), - Draft::Draft7 => draft7::walk_owned_subresources(contents, path, self, f), - Draft::Draft201909 => draft201909::walk_owned_subresources(contents, path, self, f), + Draft::Draft4 => draft4::walk_owned_subresources_map(contents, path, self, f), + Draft::Draft6 => draft6::walk_owned_subresources_map(contents, path, self, f), + Draft::Draft7 => draft7::walk_owned_subresources_map(contents, path, self, f), + Draft::Draft201909 => draft201909::walk_owned_subresources_map(contents, path, self, f), Draft::Draft202012 | Draft::Unknown => { - subresources::walk_owned_subresources(contents, path, self, f) + subresources::walk_owned_subresources_map(contents, path, self, f) } } } @@ -366,7 +344,42 @@ impl Draft { } } -fn scan_legacy_id_small(obj: &serde_json::Map) -> (Option<&str>, bool) { +fn id_and_has_legacy_id_object(obj: &Map) -> (Option<&str>, bool) { + if obj.len() <= 3 { + scan_legacy_id_small(obj) + } else { + let raw_id = obj.get("id").and_then(Value::as_str); + let is_anchor = raw_id.is_some_and(|id| id.starts_with('#')); + let plain_id = match (is_anchor, obj.contains_key("$ref")) { + (false, false) => raw_id, + _ => None, + }; + (plain_id, is_anchor) + } +} + +fn analyze_legacy_id_object(obj: &Map) -> BorrowedObjectProbe<'_> { + if obj.len() <= 3 { + return scan_legacy_id_probe_small(obj); + } + + let raw_id = obj.get("id").and_then(Value::as_str); + let has_ref = obj.get("$ref").and_then(Value::as_str).is_some(); + let has_ref_or_schema = has_ref || obj.get("$schema").and_then(Value::as_str).is_some(); + let has_anchor = raw_id.is_some_and(|id| id.starts_with('#')); + let id = match raw_id { + Some(id) if !has_anchor && !has_ref => Some(id), + _ => None, + }; + + BorrowedObjectProbe { + id, + has_anchor, + has_ref_or_schema, + } +} + +fn scan_legacy_id_small(obj: &Map) -> (Option<&str>, bool) { let mut raw_id = None; let mut has_ref = false; @@ -386,7 +399,69 @@ fn scan_legacy_id_small(obj: &serde_json::Map) -> (Option<&str>, (plain_id, is_anchor) } -fn scan_legacy_dollar_id_small(obj: &serde_json::Map) -> (Option<&str>, bool) { +fn scan_legacy_id_probe_small(obj: &Map) -> BorrowedObjectProbe<'_> { + let mut raw_id = None; + let mut has_ref = false; + let mut has_schema = false; + + for (key, value) in obj { + match key.as_str() { + "id" => raw_id = value.as_str(), + "$ref" => has_ref = value.is_string(), + "$schema" => has_schema = value.is_string(), + _ => {} + } + } + + let has_anchor = raw_id.is_some_and(|id| id.starts_with('#')); + let id = match raw_id { + Some(id) if !has_anchor && !has_ref => Some(id), + _ => None, + }; + + BorrowedObjectProbe { + id, + has_anchor, + has_ref_or_schema: has_ref || has_schema, + } +} + +fn id_and_has_legacy_dollar_id_object(obj: &Map) -> (Option<&str>, bool) { + if obj.len() <= 3 { + scan_legacy_dollar_id_small(obj) + } else { + let raw_id = obj.get("$id").and_then(Value::as_str); + let is_anchor = raw_id.is_some_and(|id| id.starts_with('#')); + let plain_id = match (is_anchor, obj.contains_key("$ref")) { + (false, false) => raw_id, + _ => None, + }; + (plain_id, is_anchor) + } +} + +fn analyze_legacy_dollar_id_object(obj: &Map) -> BorrowedObjectProbe<'_> { + if obj.len() <= 3 { + return scan_legacy_dollar_id_probe_small(obj); + } + + let raw_id = obj.get("$id").and_then(Value::as_str); + let has_ref = obj.get("$ref").and_then(Value::as_str).is_some(); + let has_ref_or_schema = has_ref || obj.get("$schema").and_then(Value::as_str).is_some(); + let has_anchor = raw_id.is_some_and(|id| id.starts_with('#')); + let id = match raw_id { + Some(id) if !has_anchor && !has_ref => Some(id), + _ => None, + }; + + BorrowedObjectProbe { + id, + has_anchor, + has_ref_or_schema, + } +} + +fn scan_legacy_dollar_id_small(obj: &Map) -> (Option<&str>, bool) { let mut raw_id = None; let mut has_ref = false; @@ -406,7 +481,56 @@ fn scan_legacy_dollar_id_small(obj: &serde_json::Map) -> (Option< (plain_id, is_anchor) } -fn scan_id_and_anchor_small(obj: &serde_json::Map) -> (Option<&str>, bool) { +fn scan_legacy_dollar_id_probe_small(obj: &Map) -> BorrowedObjectProbe<'_> { + let mut raw_id = None; + let mut has_ref = false; + let mut has_schema = false; + + for (key, value) in obj { + match key.as_str() { + "$id" => raw_id = value.as_str(), + "$ref" => has_ref = value.is_string(), + "$schema" => has_schema = value.is_string(), + _ => {} + } + } + + let has_anchor = raw_id.is_some_and(|id| id.starts_with('#')); + let id = match raw_id { + Some(id) if !has_anchor && !has_ref => Some(id), + _ => None, + }; + + BorrowedObjectProbe { + id, + has_anchor, + has_ref_or_schema: has_ref || has_schema, + } +} + +fn id_and_has_id_and_anchor_object(obj: &Map) -> (Option<&str>, bool) { + if obj.len() <= 2 { + scan_id_and_anchor_small(obj) + } else { + let id = obj.get("$id").and_then(Value::as_str); + let has_anchor = obj.get("$anchor").and_then(Value::as_str).is_some(); + (id, has_anchor) + } +} + +fn analyze_id_and_anchor_object(obj: &Map) -> BorrowedObjectProbe<'_> { + if obj.len() <= 2 { + return scan_id_and_anchor_probe_small(obj); + } + + BorrowedObjectProbe { + id: obj.get("$id").and_then(Value::as_str), + has_anchor: obj.get("$anchor").and_then(Value::as_str).is_some(), + has_ref_or_schema: has_ref_or_schema_object(obj), + } +} + +fn scan_id_and_anchor_small(obj: &Map) -> (Option<&str>, bool) { let mut id = None; let mut has_anchor = false; @@ -421,7 +545,52 @@ fn scan_id_and_anchor_small(obj: &serde_json::Map) -> (Option<&st (id, has_anchor) } -fn scan_id_and_any_anchor_small(obj: &serde_json::Map) -> (Option<&str>, bool) { +fn scan_id_and_anchor_probe_small(obj: &Map) -> BorrowedObjectProbe<'_> { + let mut id = None; + let mut has_anchor = false; + let mut has_ref_or_schema = false; + + for (key, value) in obj { + match key.as_str() { + "$id" => id = value.as_str(), + "$anchor" => has_anchor |= value.as_str().is_some(), + "$ref" | "$schema" => has_ref_or_schema |= value.is_string(), + _ => {} + } + } + + BorrowedObjectProbe { + id, + has_anchor, + has_ref_or_schema, + } +} + +fn id_and_has_id_and_any_anchor_object(obj: &Map) -> (Option<&str>, bool) { + if obj.len() <= 3 { + scan_id_and_any_anchor_small(obj) + } else { + let id = obj.get("$id").and_then(Value::as_str); + let has_anchor = obj.get("$anchor").and_then(Value::as_str).is_some() + || obj.get("$dynamicAnchor").and_then(Value::as_str).is_some(); + (id, has_anchor) + } +} + +fn analyze_id_and_any_anchor_object(obj: &Map) -> BorrowedObjectProbe<'_> { + if obj.len() <= 3 { + return scan_id_and_any_anchor_probe_small(obj); + } + + BorrowedObjectProbe { + id: obj.get("$id").and_then(Value::as_str), + has_anchor: obj.get("$anchor").and_then(Value::as_str).is_some() + || obj.get("$dynamicAnchor").and_then(Value::as_str).is_some(), + has_ref_or_schema: has_ref_or_schema_object(obj), + } +} + +fn scan_id_and_any_anchor_small(obj: &Map) -> (Option<&str>, bool) { let mut id = None; let mut has_anchor = false; @@ -436,6 +605,27 @@ fn scan_id_and_any_anchor_small(obj: &serde_json::Map) -> (Option (id, has_anchor) } +fn scan_id_and_any_anchor_probe_small(obj: &Map) -> BorrowedObjectProbe<'_> { + let mut id = None; + let mut has_anchor = false; + let mut has_ref_or_schema = false; + + for (key, value) in obj { + match key.as_str() { + "$id" => id = value.as_str(), + "$anchor" | "$dynamicAnchor" => has_anchor |= value.as_str().is_some(), + "$ref" | "$schema" => has_ref_or_schema |= value.is_string(), + _ => {} + } + } + + BorrowedObjectProbe { + id, + has_anchor, + has_ref_or_schema, + } +} + #[cfg(test)] mod tests { use crate::Draft; diff --git a/crates/jsonschema-referencing/src/specification/subresources.rs b/crates/jsonschema-referencing/src/specification/subresources.rs index 1cd8c20c..28e2e4e5 100644 --- a/crates/jsonschema-referencing/src/specification/subresources.rs +++ b/crates/jsonschema-referencing/src/specification/subresources.rs @@ -1,41 +1,20 @@ use core::slice; use std::iter::FlatMap; -use serde_json::Value; +use serde_json::{Map, Value}; use crate::{ segments::Segment, - specification::{BorrowedObjectProbe, BorrowedReferenceSlots, Draft}, + specification::{BorrowedReferenceSlots, Draft}, Error, JsonPointerNode, Resolver, ResourceRef, Segments, }; -pub(crate) fn probe_borrowed_object(contents: &Value) -> Option> { - let schema = contents.as_object()?; - - let id = schema.get("$id").and_then(Value::as_str); - let has_anchor = schema.get("$anchor").and_then(Value::as_str).is_some() - || schema - .get("$dynamicAnchor") - .and_then(Value::as_str) - .is_some(); - let has_ref_or_schema = schema.get("$ref").and_then(Value::as_str).is_some() - || schema.get("$schema").and_then(Value::as_str).is_some(); - - Some(BorrowedObjectProbe { - id, - has_anchor, - has_ref_or_schema, - }) -} - -pub(crate) fn scan_borrowed_object_into_scratch<'a>( - contents: &'a Value, +pub(crate) fn scan_borrowed_object_into_scratch_map<'a>( + schema: &'a Map, draft: Draft, references: &mut BorrowedReferenceSlots<'a>, children: &mut Vec<(&'a Value, Draft)>, -) -> Option<()> { - let schema = contents.as_object()?; - +) { for (key, value) in schema { match key.as_str() { "$ref" => { @@ -78,12 +57,10 @@ pub(crate) fn scan_borrowed_object_into_scratch<'a>( _ => {} } } - - Some(()) } -pub(crate) fn walk_owned_subresources<'a, E, F>( - contents: &'a Value, +pub(crate) fn walk_owned_subresources_map<'a, E, F>( + schema: &'a Map, path: &JsonPointerNode<'_, '_>, draft: Draft, f: &mut F, @@ -91,9 +68,6 @@ pub(crate) fn walk_owned_subresources<'a, E, F>( where F: FnMut(&JsonPointerNode<'_, '_>, &'a Value, Draft) -> Result<(), E>, { - let Some(schema) = contents.as_object() else { - return Ok(()); - }; for (key, value) in schema { match key.as_str() { "additionalProperties" @@ -134,17 +108,14 @@ where Ok(()) } -pub(crate) fn walk_borrowed_subresources<'a, E, F>( - contents: &'a Value, +pub(crate) fn walk_borrowed_subresources_map<'a, E, F>( + schema: &'a Map, draft: Draft, f: &mut F, ) -> Result<(), E> where F: FnMut(&'a Value, Draft) -> Result<(), E>, { - let Some(schema) = contents.as_object() else { - return Ok(()); - }; for (key, value) in schema { match key.as_str() { "additionalProperties" @@ -531,10 +502,13 @@ mod tests { let mut seen = Vec::new(); Draft::Draft202012 - .walk_borrowed_subresources(&schema, &mut |subschema, draft| { - seen.push((subschema.clone(), draft)); - Ok::<(), ()>(()) - }) + .walk_borrowed_subresources_map( + schema.as_object().expect("schema object should be walked"), + &mut |subschema, draft| { + seen.push((subschema.clone(), draft)); + Ok::<(), ()>(()) + }, + ) .unwrap(); assert_eq!(seen, expected); @@ -551,11 +525,15 @@ mod tests { let mut seen = Vec::new(); Draft::Draft202012 - .walk_owned_subresources(&schema, &root, &mut |path, subschema, draft| { - let pointer = crate::OwnedJsonPointer::from(path); - seen.push((pointer.as_str().to_string(), subschema.clone(), draft)); - Ok::<(), ()>(()) - }) + .walk_owned_subresources_map( + schema.as_object().expect("schema object should be walked"), + &root, + &mut |path, subschema, draft| { + let pointer = crate::OwnedJsonPointer::from(path); + seen.push((pointer.as_str().to_string(), subschema.clone(), draft)); + Ok::<(), ()>(()) + }, + ) .unwrap(); assert_eq!( From b1f418cd4e7f67271c763d4141f75e2d773a09ee Mon Sep 17 00:00:00 2001 From: Dmitry Dygalo Date: Wed, 18 Mar 2026 09:56:56 +0100 Subject: [PATCH 06/14] wip Signed-off-by: Dmitry Dygalo --- crates/jsonschema-referencing/src/registry.rs | 290 ++++++++++++++++-- .../src/specification/draft201909.rs | 155 +++++++++- .../src/specification/draft4.rs | 168 +++++++++- .../src/specification/draft6.rs | 146 ++++++++- .../src/specification/draft7.rs | 158 +++++++++- .../src/specification/mod.rs | 229 ++++++-------- .../src/specification/subresources.rs | 116 ++++++- profiler/Justfile | 2 + profiler/src/main.rs | 73 ++++- 9 files changed, 1168 insertions(+), 169 deletions(-) diff --git a/crates/jsonschema-referencing/src/registry.rs b/crates/jsonschema-referencing/src/registry.rs index 81bf20f4..4ceb8f49 100644 --- a/crates/jsonschema-referencing/src/registry.rs +++ b/crates/jsonschema-referencing/src/registry.rs @@ -858,6 +858,10 @@ fn build_prepared_index_for_documents<'a>( )?; } else { let mut local_seen = LocalSeen::new(); + let mut owned_reference_scratch = + crate::specification::BorrowedReferenceSlots::default(); + let mut owned_child_scratch: Vec> = + Vec::new(); process_owned_document( Arc::clone(doc_uri), doc_uri, @@ -867,6 +871,8 @@ fn build_prepared_index_for_documents<'a>( &mut state, &mut known_resources, resolution_cache, + &mut owned_reference_scratch, + &mut owned_child_scratch, &mut local_seen, )?; } @@ -1179,6 +1185,8 @@ fn process_queue<'r>( continue; } let mut document_local_seen = LocalSeen::new(); + let mut owned_reference_scratch = crate::specification::BorrowedReferenceSlots::default(); + let mut owned_child_scratch = Vec::new(); process_owned_document( base, &document_root_uri, @@ -1188,6 +1196,8 @@ fn process_queue<'r>( state, known_resources, resolution_cache, + &mut owned_reference_scratch, + &mut owned_child_scratch, &mut document_local_seen, )?; } @@ -1450,6 +1460,8 @@ fn process_owned_document<'a, 'r>( state: &mut ProcessingState<'r>, known_resources: &mut KnownResources, resolution_cache: &mut UriCache, + owned_reference_scratch: &mut crate::specification::BorrowedReferenceSlots<'a>, + owned_child_scratch: &mut Vec>, local_seen: &mut LocalSeen<'a>, ) -> Result<(), Error> { let document_root = document.contents(); @@ -1475,6 +1487,8 @@ fn process_owned_document<'a, 'r>( state, known_resources, resolution_cache, + owned_reference_scratch, + owned_child_scratch, local_seen, ) }) @@ -1519,12 +1533,20 @@ fn explore_owned_subtree<'a, 'r>( state: &mut ProcessingState<'r>, known_resources: &mut KnownResources, resolution_cache: &mut UriCache, + owned_reference_scratch: &mut crate::specification::BorrowedReferenceSlots<'a>, + owned_child_scratch: &mut Vec>, local_seen: &mut LocalSeen<'a>, ) -> Result<(), Error> { let object = subschema.as_object(); - let (id, has_anchors) = object.map_or((None, false), |schema| { - draft.id_and_has_anchors_object(schema) - }); + #[cfg(feature = "perf-observe-registry")] + if let Some(object) = object { + crate::observe_registry!("registry.owned.object_len={}", object.len()); + } + let child_start = owned_child_scratch.len(); + let gate = object.map(|schema| draft.owned_object_gate_map(schema)); + let (id, has_anchors) = gate + .as_ref() + .map_or((None, false), |gate| (gate.id, gate.has_anchor)); if let Some(id) = id { let original_base_uri = Arc::clone(¤t_base_uri); current_base_uri = resolve_id(¤t_base_uri, id, resolution_cache)?; @@ -1557,22 +1579,123 @@ fn explore_owned_subtree<'a, 'r>( ); } - let subschema_ptr = std::ptr::from_ref::(subschema) as usize; - if state.visited_schemas.insert(subschema_ptr) { - collect_external_resources( - ¤t_base_uri, - document_root, - subschema, - &mut state.external, - &mut state.seen, - resolution_cache, - &mut state.scratch, - &mut state.refers_metaschemas, - draft, - document_root_uri, - &mut state.deferred_refs, - local_seen, - )?; + if let (Some(schema), Some(gate)) = (object, gate.as_ref()) { + if gate.ref_.is_some() || gate.schema.is_some() { + #[cfg(feature = "perf-observe-registry")] + { + let kind = if schema.len() == 1 { + "ref_only_leaf" + } else if gate.has_children { + "ref_with_children" + } else { + "ref_no_children" + }; + crate::observe_registry!("registry.owned.gate={kind}"); + } + if schema.len() == 1 { + owned_reference_scratch.ref_ = gate.ref_; + owned_reference_scratch.schema = gate.schema; + + let subschema_ptr = std::ptr::from_ref::(subschema) as usize; + if state.visited_schemas.insert(subschema_ptr) { + collect_external_resources_from_slots( + ¤t_base_uri, + document_root, + owned_reference_scratch, + &mut state.external, + &mut state.seen, + resolution_cache, + &mut state.scratch, + &mut state.refers_metaschemas, + draft, + document_root_uri, + &mut state.deferred_refs, + local_seen, + )?; + } + owned_reference_scratch.ref_ = None; + owned_reference_scratch.schema = None; + return Ok(()); + } + + if gate.has_children { + let (_, _) = draft.scan_owned_object_into_scratch_map( + schema, + owned_reference_scratch, + owned_child_scratch, + ); + let subschema_ptr = std::ptr::from_ref::(subschema) as usize; + if state.visited_schemas.insert(subschema_ptr) { + collect_external_resources_from_slots( + ¤t_base_uri, + document_root, + owned_reference_scratch, + &mut state.external, + &mut state.seen, + resolution_cache, + &mut state.scratch, + &mut state.refers_metaschemas, + draft, + document_root_uri, + &mut state.deferred_refs, + local_seen, + )?; + } + + let child_end = owned_child_scratch.len(); + let mut idx = child_start; + while idx < child_end { + let child = owned_child_scratch[idx]; + idx += 1; + with_owned_child_path(path, &child, |child_path| { + explore_owned_subtree( + Arc::clone(¤t_base_uri), + document_root, + child.value, + child.draft, + false, + child_path, + document_root_uri, + document, + state, + known_resources, + resolution_cache, + owned_reference_scratch, + owned_child_scratch, + local_seen, + ) + })?; + } + + owned_reference_scratch.ref_ = None; + owned_reference_scratch.schema = None; + owned_child_scratch.truncate(child_start); + return Ok(()); + } + + owned_reference_scratch.ref_ = gate.ref_; + owned_reference_scratch.schema = gate.schema; + let subschema_ptr = std::ptr::from_ref::(subschema) as usize; + if state.visited_schemas.insert(subschema_ptr) { + collect_external_resources_from_slots( + ¤t_base_uri, + document_root, + owned_reference_scratch, + &mut state.external, + &mut state.seen, + resolution_cache, + &mut state.scratch, + &mut state.refers_metaschemas, + draft, + document_root_uri, + &mut state.deferred_refs, + local_seen, + )?; + } + owned_reference_scratch.ref_ = None; + owned_reference_scratch.schema = None; + return Ok(()); + } } if let Some(schema) = object { @@ -1589,6 +1712,8 @@ fn explore_owned_subtree<'a, 'r>( state, known_resources, resolution_cache, + owned_reference_scratch, + owned_child_scratch, local_seen, ) }) @@ -1957,6 +2082,122 @@ fn handle_retrieve_error( } } +fn with_owned_child_path( + path: &JsonPointerNode<'_, '_>, + child: &crate::specification::OwnedScratchChild<'_>, + f: impl FnOnce(&JsonPointerNode<'_, '_>) -> R, +) -> R { + use crate::specification::OwnedPathSegment; + + let first = match child.first { + OwnedPathSegment::Key(key) => path.push(key), + OwnedPathSegment::Index(index) => path.push(index), + }; + match child.second { + Some(OwnedPathSegment::Key(key)) => { + let second = first.push(key); + f(&second) + } + Some(OwnedPathSegment::Index(index)) => { + let second = first.push(index); + f(&second) + } + None => f(&first), + } +} + +fn collect_external_resources_from_slots<'doc>( + base: &Arc>, + root: &'doc Value, + references: &crate::specification::BorrowedReferenceSlots<'doc>, + collected: &mut AHashSet<(String, Uri, ReferenceKind)>, + seen: &mut ReferenceTracker, + resolution_cache: &mut UriCache, + scratch: &mut String, + refers_metaschemas: &mut bool, + draft: Draft, + doc_key: &Arc>, + deferred_refs: &mut Vec, + local_seen: &mut LocalSeen<'doc>, +) -> Result<(), Error> { + for (reference, key) in [(references.ref_, "$ref"), (references.schema, "$schema")] { + let Some(reference) = reference else { + continue; + }; + if reference.starts_with("https://json-schema.org/draft/") + || reference.starts_with("http://json-schema.org/draft-") + || base.as_str().starts_with("https://json-schema.org/draft/") + { + if key == "$ref" { + *refers_metaschemas = true; + } + continue; + } + if reference == "#" { + continue; + } + if reference.starts_with('#') { + crate::observe_registry!("registry.local_ref={}", reference); + if mark_local_reference(local_seen, base, reference) { + let ptr = reference.trim_start_matches('#'); + if let Some(referenced) = pointer(root, ptr) { + let target_draft = draft.detect(referenced); + let value_addr = std::ptr::from_ref::(referenced) as usize; + deferred_refs.push(( + Arc::clone(base), + Arc::clone(doc_key), + ptr.to_string(), + target_draft, + value_addr, + )); + } + } + continue; + } + if mark_reference(seen, base, reference) { + if key == "$schema" { + crate::observe_registry!("registry.schema_ref={}", reference); + } else { + crate::observe_registry!("registry.external_ref={}", reference); + } + let resolved = if base.has_fragment() { + let mut base_without_fragment = base.as_ref().clone(); + base_without_fragment.set_fragment(None); + + let (path, fragment) = match reference.split_once('#') { + Some((path, fragment)) => (path, Some(fragment)), + None => (reference, None), + }; + + let mut resolved = (*resolution_cache + .resolve_against(&base_without_fragment.borrow(), path)?) + .clone(); + if let Some(fragment) = fragment { + if let Some(encoded) = uri::EncodedString::new(fragment) { + resolved = resolved.with_fragment(Some(encoded)); + } else { + uri::encode_to(fragment, scratch); + resolved = + resolved.with_fragment(Some(uri::EncodedString::new_or_panic(scratch))); + scratch.clear(); + } + } + resolved + } else { + (*resolution_cache.resolve_against(&base.borrow(), reference)?).clone() + }; + + let kind = if key == "$schema" { + ReferenceKind::Schema + } else { + ReferenceKind::Ref + }; + collected.insert((reference.to_string(), resolved, kind)); + } + } + Ok(()) +} + fn validate_custom_metaschemas( custom_metaschemas: &[String], known_resources: &KnownResources, @@ -2007,8 +2248,7 @@ fn collect_external_resources<'doc>( } else if $reference != "#" { if $reference.starts_with('#') { crate::observe_registry!("registry.local_ref={}", $reference); - if draft == Draft::Draft4 || mark_local_reference(local_seen, base, $reference) - { + if mark_local_reference(local_seen, base, $reference) { let ptr = $reference.trim_start_matches('#'); if let Some(referenced) = pointer(root, ptr) { let target_draft = draft.detect(referenced); @@ -2505,6 +2745,8 @@ mod tests { let mut state = ProcessingState::new(); let mut known_resources = KnownResources::default(); let mut resolution_cache = UriCache::new(); + let mut owned_reference_scratch = crate::specification::BorrowedReferenceSlots::default(); + let mut owned_child_scratch: Vec> = Vec::new(); let mut local_seen = LocalSeen::new(); known_resources.insert((*doc_key).clone()); @@ -2519,6 +2761,8 @@ mod tests { &mut state, &mut known_resources, &mut resolution_cache, + &mut owned_reference_scratch, + &mut owned_child_scratch, &mut local_seen, ) .expect("owned document traversal should succeed"); @@ -2546,6 +2790,8 @@ mod tests { let mut state = ProcessingState::new(); let mut known_resources = KnownResources::default(); let mut resolution_cache = UriCache::new(); + let mut owned_reference_scratch = crate::specification::BorrowedReferenceSlots::default(); + let mut owned_child_scratch: Vec> = Vec::new(); let mut local_seen = LocalSeen::new(); known_resources.insert((*doc_key).clone()); @@ -2560,6 +2806,8 @@ mod tests { &mut state, &mut known_resources, &mut resolution_cache, + &mut owned_reference_scratch, + &mut owned_child_scratch, &mut local_seen, ) .expect("owned fragment traversal should succeed"); diff --git a/crates/jsonschema-referencing/src/specification/draft201909.rs b/crates/jsonschema-referencing/src/specification/draft201909.rs index 52214ab5..d5fce72c 100644 --- a/crates/jsonschema-referencing/src/specification/draft201909.rs +++ b/crates/jsonschema-referencing/src/specification/draft201909.rs @@ -1,12 +1,66 @@ use serde_json::{Map, Value}; use crate::{ - specification::{BorrowedReferenceSlots, Draft}, + specification::{BorrowedReferenceSlots, Draft, OwnedObjectGate, OwnedScratchChild}, Error, JsonPointerNode, Resolver, ResourceRef, Segments, }; use super::subresources::{self, SubresourceIteratorInner}; +pub(crate) fn owned_object_gate_map(schema: &Map) -> OwnedObjectGate<'_> { + let mut id = None; + let mut has_anchor = false; + let mut ref_ = None; + let mut schema_ref = None; + let mut has_children = false; + + for (key, value) in schema { + match key.as_str() { + "$id" => id = value.as_str(), + "$anchor" => has_anchor |= value.as_str().is_some(), + "$ref" => ref_ = value.as_str(), + "$schema" => schema_ref = value.as_str(), + "additionalItems" + | "additionalProperties" + | "contains" + | "contentSchema" + | "else" + | "if" + | "not" + | "propertyNames" + | "then" + | "unevaluatedItems" + | "unevaluatedProperties" => has_children = true, + "allOf" | "anyOf" | "oneOf" => { + has_children |= value.as_array().is_some_and(|items| !items.is_empty()); + } + "$defs" | "definitions" | "dependentSchemas" | "patternProperties" | "properties" => { + has_children |= value.as_object().is_some_and(|items| !items.is_empty()); + } + "items" => { + has_children |= match value { + Value::Array(items) => !items.is_empty(), + _ => true, + }; + } + "dependencies" => { + has_children |= value + .as_object() + .is_some_and(|items| items.values().any(Value::is_object)); + } + _ => {} + } + } + + OwnedObjectGate { + id, + has_anchor, + ref_, + schema: schema_ref, + has_children, + } +} + pub(crate) fn scan_borrowed_object_into_scratch_map<'a>( schema: &'a Map, draft: Draft, @@ -75,6 +129,105 @@ pub(crate) fn scan_borrowed_object_into_scratch_map<'a>( } } +pub(crate) fn scan_owned_object_into_scratch_map<'a>( + schema: &'a Map, + draft: Draft, + references: &mut BorrowedReferenceSlots<'a>, + children: &mut Vec>, +) -> (Option<&'a str>, bool) { + let mut id = None; + let mut has_anchor = false; + + for (key, value) in schema { + match key.as_str() { + "$id" => id = value.as_str(), + "$anchor" => has_anchor |= value.as_str().is_some(), + "$ref" => { + if let Some(reference) = value.as_str() { + references.ref_ = Some(reference); + } + } + "$schema" => { + if let Some(reference) = value.as_str() { + references.schema = Some(reference); + } + } + "additionalItems" + | "additionalProperties" + | "contains" + | "contentSchema" + | "else" + | "if" + | "not" + | "propertyNames" + | "then" + | "unevaluatedItems" + | "unevaluatedProperties" => { + children.push(OwnedScratchChild::key( + key.as_str(), + value, + draft.detect(value), + )); + } + "allOf" | "anyOf" | "oneOf" => { + if let Some(arr) = value.as_array() { + for (index, item) in arr.iter().enumerate() { + children.push(OwnedScratchChild::key_index( + key.as_str(), + index, + item, + draft.detect(item), + )); + } + } + } + "$defs" | "definitions" | "dependentSchemas" | "patternProperties" | "properties" => { + if let Some(obj) = value.as_object() { + for (child_key, child_value) in obj { + children.push(OwnedScratchChild::key_key( + key.as_str(), + child_key.as_str(), + child_value, + draft.detect(child_value), + )); + } + } + } + "items" => match value { + Value::Array(arr) => { + for (index, item) in arr.iter().enumerate() { + children.push(OwnedScratchChild::key_index( + "items", + index, + item, + draft.detect(item), + )); + } + } + _ => children.push(OwnedScratchChild::key("items", value, draft.detect(value))), + }, + "dependencies" => { + if let Some(obj) = value.as_object() { + for (child_key, child_value) in obj { + if !child_value.is_object() { + continue; + } + children.push(OwnedScratchChild::key_key( + key.as_str(), + child_key.as_str(), + child_value, + draft.detect(child_value), + )); + } + } + } + _ => {} + } + } + + (id, has_anchor) +} + pub(crate) fn walk_borrowed_subresources_map<'a, E, F>( schema: &'a Map, draft: Draft, diff --git a/crates/jsonschema-referencing/src/specification/draft4.rs b/crates/jsonschema-referencing/src/specification/draft4.rs index c26d81de..02335220 100644 --- a/crates/jsonschema-referencing/src/specification/draft4.rs +++ b/crates/jsonschema-referencing/src/specification/draft4.rs @@ -1,12 +1,69 @@ use serde_json::{Map, Value}; use crate::{ - specification::{BorrowedReferenceSlots, Draft}, + specification::{BorrowedReferenceSlots, Draft, OwnedObjectGate, OwnedScratchChild}, Error, JsonPointerNode, Resolver, ResourceRef, Segments, }; use super::subresources::{self, SubresourceIteratorInner}; +pub(crate) fn owned_object_gate_map(schema: &Map) -> OwnedObjectGate<'_> { + let mut raw_id = None; + let mut ref_ = None; + let mut schema_ref = None; + let mut has_children = false; + + for (key, value) in schema { + match key.as_str() { + "id" => raw_id = value.as_str(), + "$ref" => ref_ = value.as_str(), + "$schema" => schema_ref = value.as_str(), + "additionalItems" | "additionalProperties" => has_children |= value.is_object(), + "contains" + | "contentSchema" + | "else" + | "if" + | "not" + | "propertyNames" + | "then" + | "unevaluatedItems" + | "unevaluatedProperties" => has_children = true, + "allOf" | "anyOf" | "oneOf" | "prefixItems" => { + has_children |= value.as_array().is_some_and(|items| !items.is_empty()); + } + "$defs" | "definitions" | "dependentSchemas" | "patternProperties" | "properties" => { + has_children |= value.as_object().is_some_and(|items| !items.is_empty()); + } + "items" => { + has_children |= match value { + Value::Array(items) => !items.is_empty(), + _ => true, + }; + } + "dependencies" => { + has_children |= value + .as_object() + .is_some_and(|items| items.values().any(Value::is_object)); + } + _ => {} + } + } + + let has_anchor = raw_id.is_some_and(|id| id.starts_with('#')); + let id = match raw_id { + Some(id) if !has_anchor && ref_.is_none() => Some(id), + _ => None, + }; + + OwnedObjectGate { + id, + has_anchor, + ref_, + schema: schema_ref, + has_children, + } +} + pub(crate) fn scan_borrowed_object_into_scratch_map<'a>( schema: &'a Map, draft: Draft, @@ -84,6 +141,115 @@ pub(crate) fn scan_borrowed_object_into_scratch_map<'a>( } } +pub(crate) fn scan_owned_object_into_scratch_map<'a>( + schema: &'a Map, + draft: Draft, + references: &mut BorrowedReferenceSlots<'a>, + children: &mut Vec>, +) -> (Option<&'a str>, bool) { + let mut raw_id = None; + let mut has_ref = false; + + for (key, value) in schema { + match key.as_str() { + "id" => raw_id = value.as_str(), + "$ref" => { + if let Some(reference) = value.as_str() { + has_ref = true; + references.ref_ = Some(reference); + } + } + "$schema" => { + if let Some(reference) = value.as_str() { + references.schema = Some(reference); + } + } + "additionalItems" | "additionalProperties" if value.is_object() => { + children.push(OwnedScratchChild::key( + key.as_str(), + value, + draft.detect(value), + )); + } + "contains" + | "contentSchema" + | "else" + | "if" + | "not" + | "propertyNames" + | "then" + | "unevaluatedItems" + | "unevaluatedProperties" => { + children.push(OwnedScratchChild::key( + key.as_str(), + value, + draft.detect(value), + )); + } + "allOf" | "anyOf" | "oneOf" | "prefixItems" => { + if let Some(arr) = value.as_array() { + for (index, item) in arr.iter().enumerate() { + children.push(OwnedScratchChild::key_index( + key.as_str(), + index, + item, + draft.detect(item), + )); + } + } + } + "$defs" | "definitions" | "dependentSchemas" | "patternProperties" | "properties" => { + if let Some(obj) = value.as_object() { + for (child_key, child_value) in obj { + children.push(OwnedScratchChild::key_key( + key.as_str(), + child_key.as_str(), + child_value, + draft.detect(child_value), + )); + } + } + } + "items" => match value { + Value::Array(arr) => { + for (index, item) in arr.iter().enumerate() { + children.push(OwnedScratchChild::key_index( + "items", + index, + item, + draft.detect(item), + )); + } + } + _ => children.push(OwnedScratchChild::key("items", value, draft.detect(value))), + }, + "dependencies" => { + if let Some(obj) = value.as_object() { + for (child_key, child_value) in obj { + if !child_value.is_object() { + continue; + } + children.push(OwnedScratchChild::key_key( + key.as_str(), + child_key.as_str(), + child_value, + draft.detect(child_value), + )); + } + } + } + _ => {} + } + } + + let has_anchor = raw_id.is_some_and(|id| id.starts_with('#')); + let id = match raw_id { + Some(id) if !has_anchor && !has_ref => Some(id), + _ => None, + }; + (id, has_anchor) +} + pub(crate) fn walk_borrowed_subresources_map<'a, E, F>( schema: &'a Map, draft: Draft, diff --git a/crates/jsonschema-referencing/src/specification/draft6.rs b/crates/jsonschema-referencing/src/specification/draft6.rs index 5c66e419..4f79ca12 100644 --- a/crates/jsonschema-referencing/src/specification/draft6.rs +++ b/crates/jsonschema-referencing/src/specification/draft6.rs @@ -1,12 +1,62 @@ use serde_json::{Map, Value}; use crate::{ - specification::{BorrowedReferenceSlots, Draft}, + specification::{BorrowedReferenceSlots, Draft, OwnedObjectGate, OwnedScratchChild}, Error, JsonPointerNode, Resolver, ResourceRef, Segments, }; use super::subresources::{self, SubresourceIteratorInner}; +pub(crate) fn owned_object_gate_map(schema: &Map) -> OwnedObjectGate<'_> { + let mut raw_id = None; + let mut ref_ = None; + let mut schema_ref = None; + let mut has_children = false; + + for (key, value) in schema { + match key.as_str() { + "$id" => raw_id = value.as_str(), + "$ref" => ref_ = value.as_str(), + "$schema" => schema_ref = value.as_str(), + "additionalItems" | "additionalProperties" | "contains" | "not" | "propertyNames" => { + has_children = true; + } + "allOf" | "anyOf" | "oneOf" => { + has_children |= value.as_array().is_some_and(|items| !items.is_empty()); + } + "definitions" | "patternProperties" | "properties" => { + has_children |= value.as_object().is_some_and(|items| !items.is_empty()); + } + "items" => { + has_children |= match value { + Value::Array(items) => !items.is_empty(), + _ => true, + }; + } + "dependencies" => { + has_children |= value + .as_object() + .is_some_and(|items| items.values().any(Value::is_object)); + } + _ => {} + } + } + + let has_anchor = raw_id.is_some_and(|id| id.starts_with('#')); + let id = match raw_id { + Some(id) if !has_anchor && ref_.is_none() => Some(id), + _ => None, + }; + + OwnedObjectGate { + id, + has_anchor, + ref_, + schema: schema_ref, + has_children, + } +} + pub(crate) fn scan_borrowed_object_into_scratch_map<'a>( schema: &'a Map, draft: Draft, @@ -65,6 +115,100 @@ pub(crate) fn scan_borrowed_object_into_scratch_map<'a>( } } +pub(crate) fn scan_owned_object_into_scratch_map<'a>( + schema: &'a Map, + draft: Draft, + references: &mut BorrowedReferenceSlots<'a>, + children: &mut Vec>, +) -> (Option<&'a str>, bool) { + let mut raw_id = None; + let mut has_ref = false; + + for (key, value) in schema { + match key.as_str() { + "$id" => raw_id = value.as_str(), + "$ref" => { + if let Some(reference) = value.as_str() { + has_ref = true; + references.ref_ = Some(reference); + } + } + "$schema" => { + if let Some(reference) = value.as_str() { + references.schema = Some(reference); + } + } + "additionalItems" | "additionalProperties" | "contains" | "not" | "propertyNames" => { + children.push(OwnedScratchChild::key( + key.as_str(), + value, + draft.detect(value), + )); + } + "allOf" | "anyOf" | "oneOf" => { + if let Some(arr) = value.as_array() { + for (index, item) in arr.iter().enumerate() { + children.push(OwnedScratchChild::key_index( + key.as_str(), + index, + item, + draft.detect(item), + )); + } + } + } + "definitions" | "patternProperties" | "properties" => { + if let Some(obj) = value.as_object() { + for (child_key, child_value) in obj { + children.push(OwnedScratchChild::key_key( + key.as_str(), + child_key.as_str(), + child_value, + draft.detect(child_value), + )); + } + } + } + "items" => match value { + Value::Array(arr) => { + for (index, item) in arr.iter().enumerate() { + children.push(OwnedScratchChild::key_index( + "items", + index, + item, + draft.detect(item), + )); + } + } + _ => children.push(OwnedScratchChild::key("items", value, draft.detect(value))), + }, + "dependencies" => { + if let Some(obj) = value.as_object() { + for (child_key, child_value) in obj { + if !child_value.is_object() { + continue; + } + children.push(OwnedScratchChild::key_key( + key.as_str(), + child_key.as_str(), + child_value, + draft.detect(child_value), + )); + } + } + } + _ => {} + } + } + + let has_anchor = raw_id.is_some_and(|id| id.starts_with('#')); + let id = match raw_id { + Some(id) if !has_anchor && !has_ref => Some(id), + _ => None, + }; + (id, has_anchor) +} + pub(crate) fn walk_borrowed_subresources_map<'a, E, F>( schema: &'a Map, draft: Draft, diff --git a/crates/jsonschema-referencing/src/specification/draft7.rs b/crates/jsonschema-referencing/src/specification/draft7.rs index fa5de71a..544d780d 100644 --- a/crates/jsonschema-referencing/src/specification/draft7.rs +++ b/crates/jsonschema-referencing/src/specification/draft7.rs @@ -1,12 +1,67 @@ use serde_json::{Map, Value}; use crate::{ - specification::{BorrowedReferenceSlots, Draft}, + specification::{BorrowedReferenceSlots, Draft, OwnedObjectGate, OwnedScratchChild}, Error, JsonPointerNode, Resolver, ResourceRef, Segments, }; use super::subresources::{self, SubresourceIteratorInner}; +pub(crate) fn owned_object_gate_map(schema: &Map) -> OwnedObjectGate<'_> { + let mut raw_id = None; + let mut ref_ = None; + let mut schema_ref = None; + let mut has_children = false; + + for (key, value) in schema { + match key.as_str() { + "$id" => raw_id = value.as_str(), + "$ref" => ref_ = value.as_str(), + "$schema" => schema_ref = value.as_str(), + "additionalItems" + | "additionalProperties" + | "contains" + | "else" + | "if" + | "not" + | "propertyNames" + | "then" => has_children = true, + "allOf" | "anyOf" | "oneOf" => { + has_children |= value.as_array().is_some_and(|items| !items.is_empty()); + } + "definitions" | "patternProperties" | "properties" => { + has_children |= value.as_object().is_some_and(|items| !items.is_empty()); + } + "items" => { + has_children |= match value { + Value::Array(items) => !items.is_empty(), + _ => true, + }; + } + "dependencies" => { + has_children |= value + .as_object() + .is_some_and(|items| items.values().any(Value::is_object)); + } + _ => {} + } + } + + let has_anchor = raw_id.is_some_and(|id| id.starts_with('#')); + let id = match raw_id { + Some(id) if !has_anchor && ref_.is_none() => Some(id), + _ => None, + }; + + OwnedObjectGate { + id, + has_anchor, + ref_, + schema: schema_ref, + has_children, + } +} + pub(crate) fn scan_borrowed_object_into_scratch_map<'a>( schema: &'a Map, draft: Draft, @@ -72,6 +127,107 @@ pub(crate) fn scan_borrowed_object_into_scratch_map<'a>( } } +pub(crate) fn scan_owned_object_into_scratch_map<'a>( + schema: &'a Map, + draft: Draft, + references: &mut BorrowedReferenceSlots<'a>, + children: &mut Vec>, +) -> (Option<&'a str>, bool) { + let mut raw_id = None; + let mut has_ref = false; + + for (key, value) in schema { + match key.as_str() { + "$id" => raw_id = value.as_str(), + "$ref" => { + if let Some(reference) = value.as_str() { + has_ref = true; + references.ref_ = Some(reference); + } + } + "$schema" => { + if let Some(reference) = value.as_str() { + references.schema = Some(reference); + } + } + "additionalItems" + | "additionalProperties" + | "contains" + | "else" + | "if" + | "not" + | "propertyNames" + | "then" => { + children.push(OwnedScratchChild::key( + key.as_str(), + value, + draft.detect(value), + )); + } + "allOf" | "anyOf" | "oneOf" => { + if let Some(arr) = value.as_array() { + for (index, item) in arr.iter().enumerate() { + children.push(OwnedScratchChild::key_index( + key.as_str(), + index, + item, + draft.detect(item), + )); + } + } + } + "definitions" | "patternProperties" | "properties" => { + if let Some(obj) = value.as_object() { + for (child_key, child_value) in obj { + children.push(OwnedScratchChild::key_key( + key.as_str(), + child_key.as_str(), + child_value, + draft.detect(child_value), + )); + } + } + } + "items" => match value { + Value::Array(arr) => { + for (index, item) in arr.iter().enumerate() { + children.push(OwnedScratchChild::key_index( + "items", + index, + item, + draft.detect(item), + )); + } + } + _ => children.push(OwnedScratchChild::key("items", value, draft.detect(value))), + }, + "dependencies" => { + if let Some(obj) = value.as_object() { + for (child_key, child_value) in obj { + if !child_value.is_object() { + continue; + } + children.push(OwnedScratchChild::key_key( + key.as_str(), + child_key.as_str(), + child_value, + draft.detect(child_value), + )); + } + } + } + _ => {} + } + } + + let has_anchor = raw_id.is_some_and(|id| id.starts_with('#')); + let id = match raw_id { + Some(id) if !has_anchor && !has_ref => Some(id), + _ => None, + }; + (id, has_anchor) +} + pub(crate) fn walk_borrowed_subresources_map<'a, E, F>( schema: &'a Map, draft: Draft, diff --git a/crates/jsonschema-referencing/src/specification/mod.rs b/crates/jsonschema-referencing/src/specification/mod.rs index be244262..59d068e8 100644 --- a/crates/jsonschema-referencing/src/specification/mod.rs +++ b/crates/jsonschema-referencing/src/specification/mod.rs @@ -20,6 +20,65 @@ pub(crate) struct BorrowedObjectProbe<'a> { pub(crate) has_ref_or_schema: bool, } +pub(crate) struct OwnedObjectGate<'a> { + pub(crate) id: Option<&'a str>, + pub(crate) has_anchor: bool, + pub(crate) ref_: Option<&'a str>, + pub(crate) schema: Option<&'a str>, + pub(crate) has_children: bool, +} + +#[derive(Copy, Clone)] +pub(crate) enum OwnedPathSegment<'a> { + Key(&'a str), + Index(usize), +} + +#[derive(Copy, Clone)] +pub(crate) struct OwnedScratchChild<'a> { + pub(crate) first: OwnedPathSegment<'a>, + pub(crate) second: Option>, + pub(crate) value: &'a Value, + pub(crate) draft: Draft, +} + +impl<'a> OwnedScratchChild<'a> { + #[inline] + pub(crate) fn key(key: &'a str, value: &'a Value, draft: Draft) -> Self { + Self { + first: OwnedPathSegment::Key(key), + second: None, + value, + draft, + } + } + + #[inline] + pub(crate) fn key_index(key: &'a str, index: usize, value: &'a Value, draft: Draft) -> Self { + Self { + first: OwnedPathSegment::Key(key), + second: Some(OwnedPathSegment::Index(index)), + value, + draft, + } + } + + #[inline] + pub(crate) fn key_key( + key: &'a str, + child_key: &'a str, + value: &'a Value, + draft: Draft, + ) -> Self { + Self { + first: OwnedPathSegment::Key(key), + second: Some(OwnedPathSegment::Key(child_key)), + value, + draft, + } + } +} + #[inline] pub(crate) fn has_ref_or_schema_object(schema: &Map) -> bool { if schema.len() <= 3 { @@ -119,19 +178,6 @@ impl Draft { } } - #[inline] - pub(crate) fn id_and_has_anchors_object( - self, - obj: &Map, - ) -> (Option<&str>, bool) { - match self { - Draft::Draft4 => id_and_has_legacy_id_object(obj), - Draft::Draft6 | Draft::Draft7 => id_and_has_legacy_dollar_id_object(obj), - Draft::Draft201909 => id_and_has_id_and_anchor_object(obj), - Draft::Draft202012 | Draft::Unknown => id_and_has_id_and_any_anchor_object(obj), - } - } - #[inline] pub(crate) fn probe_borrowed_object_map( self, @@ -181,10 +227,48 @@ impl Draft { Draft::Draft202012 | Draft::Unknown => { subresources::scan_borrowed_object_into_scratch_map( contents, self, references, children, + ); + } + } + } + pub(crate) fn scan_owned_object_into_scratch_map<'a>( + self, + contents: &'a Map, + references: &mut BorrowedReferenceSlots<'a>, + children: &mut Vec>, + ) -> (Option<&'a str>, bool) { + match self { + Draft::Draft4 => { + draft4::scan_owned_object_into_scratch_map(contents, self, references, children) + } + Draft::Draft6 => { + draft6::scan_owned_object_into_scratch_map(contents, self, references, children) + } + Draft::Draft7 => { + draft7::scan_owned_object_into_scratch_map(contents, self, references, children) + } + Draft::Draft201909 => draft201909::scan_owned_object_into_scratch_map( + contents, self, references, children, + ), + Draft::Draft202012 | Draft::Unknown => { + subresources::scan_owned_object_into_scratch_map( + contents, self, references, children, ) } } } + pub(crate) fn owned_object_gate_map( + self, + contents: &Map, + ) -> OwnedObjectGate<'_> { + match self { + Draft::Draft4 => draft4::owned_object_gate_map(contents), + Draft::Draft6 => draft6::owned_object_gate_map(contents), + Draft::Draft7 => draft7::owned_object_gate_map(contents), + Draft::Draft201909 => draft201909::owned_object_gate_map(contents), + Draft::Draft202012 | Draft::Unknown => subresources::owned_object_gate_map(contents), + } + } pub(crate) fn walk_borrowed_subresources_map<'a, E, F>( self, contents: &'a Map, @@ -344,20 +428,6 @@ impl Draft { } } -fn id_and_has_legacy_id_object(obj: &Map) -> (Option<&str>, bool) { - if obj.len() <= 3 { - scan_legacy_id_small(obj) - } else { - let raw_id = obj.get("id").and_then(Value::as_str); - let is_anchor = raw_id.is_some_and(|id| id.starts_with('#')); - let plain_id = match (is_anchor, obj.contains_key("$ref")) { - (false, false) => raw_id, - _ => None, - }; - (plain_id, is_anchor) - } -} - fn analyze_legacy_id_object(obj: &Map) -> BorrowedObjectProbe<'_> { if obj.len() <= 3 { return scan_legacy_id_probe_small(obj); @@ -379,26 +449,6 @@ fn analyze_legacy_id_object(obj: &Map) -> BorrowedObjectProbe<'_> } } -fn scan_legacy_id_small(obj: &Map) -> (Option<&str>, bool) { - let mut raw_id = None; - let mut has_ref = false; - - for (key, value) in obj { - match key.as_str() { - "id" => raw_id = value.as_str(), - "$ref" => has_ref = true, - _ => {} - } - } - - let is_anchor = raw_id.is_some_and(|id| id.starts_with('#')); - let plain_id = match (is_anchor, has_ref) { - (false, false) => raw_id, - _ => None, - }; - (plain_id, is_anchor) -} - fn scan_legacy_id_probe_small(obj: &Map) -> BorrowedObjectProbe<'_> { let mut raw_id = None; let mut has_ref = false; @@ -426,20 +476,6 @@ fn scan_legacy_id_probe_small(obj: &Map) -> BorrowedObjectProbe<' } } -fn id_and_has_legacy_dollar_id_object(obj: &Map) -> (Option<&str>, bool) { - if obj.len() <= 3 { - scan_legacy_dollar_id_small(obj) - } else { - let raw_id = obj.get("$id").and_then(Value::as_str); - let is_anchor = raw_id.is_some_and(|id| id.starts_with('#')); - let plain_id = match (is_anchor, obj.contains_key("$ref")) { - (false, false) => raw_id, - _ => None, - }; - (plain_id, is_anchor) - } -} - fn analyze_legacy_dollar_id_object(obj: &Map) -> BorrowedObjectProbe<'_> { if obj.len() <= 3 { return scan_legacy_dollar_id_probe_small(obj); @@ -461,26 +497,6 @@ fn analyze_legacy_dollar_id_object(obj: &Map) -> BorrowedObjectPr } } -fn scan_legacy_dollar_id_small(obj: &Map) -> (Option<&str>, bool) { - let mut raw_id = None; - let mut has_ref = false; - - for (key, value) in obj { - match key.as_str() { - "$id" => raw_id = value.as_str(), - "$ref" => has_ref = true, - _ => {} - } - } - - let is_anchor = raw_id.is_some_and(|id| id.starts_with('#')); - let plain_id = match (is_anchor, has_ref) { - (false, false) => raw_id, - _ => None, - }; - (plain_id, is_anchor) -} - fn scan_legacy_dollar_id_probe_small(obj: &Map) -> BorrowedObjectProbe<'_> { let mut raw_id = None; let mut has_ref = false; @@ -508,16 +524,6 @@ fn scan_legacy_dollar_id_probe_small(obj: &Map) -> BorrowedObject } } -fn id_and_has_id_and_anchor_object(obj: &Map) -> (Option<&str>, bool) { - if obj.len() <= 2 { - scan_id_and_anchor_small(obj) - } else { - let id = obj.get("$id").and_then(Value::as_str); - let has_anchor = obj.get("$anchor").and_then(Value::as_str).is_some(); - (id, has_anchor) - } -} - fn analyze_id_and_anchor_object(obj: &Map) -> BorrowedObjectProbe<'_> { if obj.len() <= 2 { return scan_id_and_anchor_probe_small(obj); @@ -530,21 +536,6 @@ fn analyze_id_and_anchor_object(obj: &Map) -> BorrowedObjectProbe } } -fn scan_id_and_anchor_small(obj: &Map) -> (Option<&str>, bool) { - let mut id = None; - let mut has_anchor = false; - - for (key, value) in obj { - match key.as_str() { - "$id" => id = value.as_str(), - "$anchor" => has_anchor |= value.as_str().is_some(), - _ => {} - } - } - - (id, has_anchor) -} - fn scan_id_and_anchor_probe_small(obj: &Map) -> BorrowedObjectProbe<'_> { let mut id = None; let mut has_anchor = false; @@ -566,17 +557,6 @@ fn scan_id_and_anchor_probe_small(obj: &Map) -> BorrowedObjectPro } } -fn id_and_has_id_and_any_anchor_object(obj: &Map) -> (Option<&str>, bool) { - if obj.len() <= 3 { - scan_id_and_any_anchor_small(obj) - } else { - let id = obj.get("$id").and_then(Value::as_str); - let has_anchor = obj.get("$anchor").and_then(Value::as_str).is_some() - || obj.get("$dynamicAnchor").and_then(Value::as_str).is_some(); - (id, has_anchor) - } -} - fn analyze_id_and_any_anchor_object(obj: &Map) -> BorrowedObjectProbe<'_> { if obj.len() <= 3 { return scan_id_and_any_anchor_probe_small(obj); @@ -590,21 +570,6 @@ fn analyze_id_and_any_anchor_object(obj: &Map) -> BorrowedObjectP } } -fn scan_id_and_any_anchor_small(obj: &Map) -> (Option<&str>, bool) { - let mut id = None; - let mut has_anchor = false; - - for (key, value) in obj { - match key.as_str() { - "$id" => id = value.as_str(), - "$anchor" | "$dynamicAnchor" => has_anchor |= value.as_str().is_some(), - _ => {} - } - } - - (id, has_anchor) -} - fn scan_id_and_any_anchor_probe_small(obj: &Map) -> BorrowedObjectProbe<'_> { let mut id = None; let mut has_anchor = false; diff --git a/crates/jsonschema-referencing/src/specification/subresources.rs b/crates/jsonschema-referencing/src/specification/subresources.rs index 28e2e4e5..fc8d3d20 100644 --- a/crates/jsonschema-referencing/src/specification/subresources.rs +++ b/crates/jsonschema-referencing/src/specification/subresources.rs @@ -5,7 +5,7 @@ use serde_json::{Map, Value}; use crate::{ segments::Segment, - specification::{BorrowedReferenceSlots, Draft}, + specification::{BorrowedReferenceSlots, Draft, OwnedObjectGate, OwnedScratchChild}, Error, JsonPointerNode, Resolver, ResourceRef, Segments, }; @@ -59,6 +59,120 @@ pub(crate) fn scan_borrowed_object_into_scratch_map<'a>( } } +pub(crate) fn owned_object_gate_map(schema: &Map) -> OwnedObjectGate<'_> { + let mut id = None; + let mut has_anchor = false; + let mut ref_ = None; + let mut schema_ref = None; + let mut has_children = false; + + for (key, value) in schema { + match key.as_str() { + "$id" => id = value.as_str(), + "$anchor" | "$dynamicAnchor" => has_anchor |= value.as_str().is_some(), + "$ref" => ref_ = value.as_str(), + "$schema" => schema_ref = value.as_str(), + "additionalProperties" + | "contains" + | "contentSchema" + | "else" + | "if" + | "items" + | "not" + | "propertyNames" + | "then" + | "unevaluatedItems" + | "unevaluatedProperties" => has_children = true, + "allOf" | "anyOf" | "oneOf" | "prefixItems" => { + has_children |= value.as_array().is_some_and(|items| !items.is_empty()); + } + "$defs" | "definitions" | "dependentSchemas" | "patternProperties" | "properties" => { + has_children |= value.as_object().is_some_and(|items| !items.is_empty()); + } + _ => {} + } + } + + OwnedObjectGate { + id, + has_anchor, + ref_, + schema: schema_ref, + has_children, + } +} + +pub(crate) fn scan_owned_object_into_scratch_map<'a>( + schema: &'a Map, + draft: Draft, + references: &mut BorrowedReferenceSlots<'a>, + children: &mut Vec>, +) -> (Option<&'a str>, bool) { + let mut id = None; + let mut has_anchor = false; + + for (key, value) in schema { + match key.as_str() { + "$id" => id = value.as_str(), + "$anchor" | "$dynamicAnchor" => has_anchor |= value.as_str().is_some(), + "$ref" => { + if let Some(reference) = value.as_str() { + references.ref_ = Some(reference); + } + } + "$schema" => { + if let Some(reference) = value.as_str() { + references.schema = Some(reference); + } + } + "additionalProperties" + | "contains" + | "contentSchema" + | "else" + | "if" + | "items" + | "not" + | "propertyNames" + | "then" + | "unevaluatedItems" + | "unevaluatedProperties" => { + children.push(OwnedScratchChild::key( + key.as_str(), + value, + draft.detect(value), + )); + } + "allOf" | "anyOf" | "oneOf" | "prefixItems" => { + if let Some(arr) = value.as_array() { + for (index, item) in arr.iter().enumerate() { + children.push(OwnedScratchChild::key_index( + key.as_str(), + index, + item, + draft.detect(item), + )); + } + } + } + "$defs" | "definitions" | "dependentSchemas" | "patternProperties" | "properties" => { + if let Some(obj) = value.as_object() { + for (child_key, child_value) in obj { + children.push(OwnedScratchChild::key_key( + key.as_str(), + child_key.as_str(), + child_value, + draft.detect(child_value), + )); + } + } + } + _ => {} + } + } + + (id, has_anchor) +} + pub(crate) fn walk_owned_subresources_map<'a, E, F>( schema: &'a Map, path: &JsonPointerNode<'_, '_>, diff --git a/profiler/Justfile b/profiler/Justfile index 22f26593..f4029fe7 100644 --- a/profiler/Justfile +++ b/profiler/Justfile @@ -84,6 +84,8 @@ fast-invalid-iter-errors: (flame "fast-invalid" "iter_errors" "10000") fast-invalid-evaluate: (flame "fast-invalid" "evaluate" "10000") registry: (flame "citm" "registry" "1000") +registry-owned: (flame "citm" "registry-owned" "1000") +registry-owned-specifications: (flame "openapi" "registry-owned-with-specifications" "1000") fhir-build: (flame "fhir" "build" "500") dhat-citm-build: (dhat "citm" "build" "10000") diff --git a/profiler/src/main.rs b/profiler/src/main.rs index e36606d1..6ba36444 100644 --- a/profiler/src/main.rs +++ b/profiler/src/main.rs @@ -1,5 +1,5 @@ use jsonschema::Registry; -use referencing::SPECIFICATIONS; +use referencing::{Draft, SPECIFICATIONS}; use serde_json::Value; use std::fs; @@ -11,6 +11,7 @@ struct Args { iterations: usize, schema_path: String, instance_path: Option, + draft: Option, method: String, } @@ -19,15 +20,43 @@ fn main() -> Result<(), Box> { // Handle presets let preset = pico_args.value_from_str::<_, String>("--preset").ok(); - let (schema_path, instance_path) = if let Some(preset) = preset { + let (schema_path, instance_path, draft) = if let Some(preset) = preset { match preset.as_str() { - "openapi" => ("../crates/benchmark/data/openapi.json".to_string(), Some("../crates/benchmark/data/zuora.json".to_string())), - "swagger" => ("../crates/benchmark/data/swagger.json".to_string(), Some("../crates/benchmark/data/kubernetes.json".to_string())), - "geojson" => ("../crates/benchmark/data/geojson.json".to_string(), Some("../crates/benchmark/data/canada.json".to_string())), - "citm" => ("../crates/benchmark/data/citm_catalog_schema.json".to_string(), Some("../crates/benchmark/data/citm_catalog.json".to_string())), - "fast-valid" => ("../crates/benchmark/data/fast_schema.json".to_string(), Some("../crates/benchmark/data/fast_valid.json".to_string())), - "fast-invalid" => ("../crates/benchmark/data/fast_schema.json".to_string(), Some("../crates/benchmark/data/fast_invalid.json".to_string())), - "fhir" => ("../crates/benchmark/data/fhir.schema.json".to_string(), None), + "openapi" => ( + "../crates/benchmark/data/openapi.json".to_string(), + Some("../crates/benchmark/data/zuora.json".to_string()), + Some(Draft::Draft4), + ), + "swagger" => ( + "../crates/benchmark/data/swagger.json".to_string(), + Some("../crates/benchmark/data/kubernetes.json".to_string()), + Some(Draft::Draft4), + ), + "geojson" => ( + "../crates/benchmark/data/geojson.json".to_string(), + Some("../crates/benchmark/data/canada.json".to_string()), + Some(Draft::Draft4), + ), + "citm" => ( + "../crates/benchmark/data/citm_catalog_schema.json".to_string(), + Some("../crates/benchmark/data/citm_catalog.json".to_string()), + Some(Draft::Draft4), + ), + "fast-valid" => ( + "../crates/benchmark/data/fast_schema.json".to_string(), + Some("../crates/benchmark/data/fast_valid.json".to_string()), + Some(Draft::Draft7), + ), + "fast-invalid" => ( + "../crates/benchmark/data/fast_schema.json".to_string(), + Some("../crates/benchmark/data/fast_invalid.json".to_string()), + Some(Draft::Draft7), + ), + "fhir" => ( + "../crates/benchmark/data/fhir.schema.json".to_string(), + None, + None, + ), _ => return Err(format!("Unknown preset: {}. Available: openapi, swagger, geojson, citm, fast-valid, fast-invalid, fhir", preset).into()), } } else { @@ -35,13 +64,14 @@ fn main() -> Result<(), Box> { .value_from_str("--schema") .map_err(|_| "--schema is required when not using --preset")?; let instance_path = pico_args.value_from_str("--instance").ok(); - (schema_path, instance_path) + (schema_path, instance_path, None) }; let args = Args { iterations: pico_args.value_from_str("--iterations")?, schema_path, instance_path, + draft, method: pico_args.value_from_str("--method")?, }; @@ -53,6 +83,7 @@ fn main() -> Result<(), Box> { let schema_str = fs::read_to_string(&args.schema_path)?; let schema: Value = serde_json::from_str(&schema_str)?; + let draft = args.draft.unwrap_or_else(|| Draft::default().detect(&schema)); // To initialise metaschema validators let _ = &*SPECIFICATIONS; @@ -75,6 +106,26 @@ fn main() -> Result<(), Box> { .expect("Failed to build registry"); } } + "registry-owned" => { + for _ in 0..args.iterations { + let resource = draft.create_resource(schema.clone()); + let _ = Registry::new() + .add("http://example.com/schema", resource) + .expect("Invalid resource") + .prepare() + .expect("Failed to build registry"); + } + } + "registry-owned-with-specifications" => { + for _ in 0..args.iterations { + let resource = draft.create_resource(schema.clone()); + let _ = SPECIFICATIONS + .add("http://example.com/schema", resource) + .expect("Invalid resource") + .prepare() + .expect("Failed to build registry"); + } + } "is_valid" | "validate" | "iter_errors" | "evaluate" => { let instance_path = args .instance_path @@ -115,7 +166,7 @@ fn main() -> Result<(), Box> { } _ => { return Err( - "Invalid method. Use 'registry', 'build', 'is_valid', 'validate', 'iter_errors', or 'evaluate'" + "Invalid method. Use 'registry', 'registry-owned', 'registry-owned-with-specifications', 'build', 'is_valid', 'validate', 'iter_errors', or 'evaluate'" .into(), ); } From 6843ef498937e4f3e81c9c291eb09be54a54b519 Mon Sep 17 00:00:00 2001 From: Dmitry Dygalo Date: Tue, 7 Apr 2026 14:20:59 +0200 Subject: [PATCH 07/14] wip Signed-off-by: Dmitry Dygalo --- .../src/{anchors/mod.rs => anchor.rs} | 10 + .../src/anchors/keys.rs | 84 -- .../src/{specification/mod.rs => draft.rs} | 213 ++-- crates/jsonschema-referencing/src/lib.rs | 15 +- crates/jsonschema-referencing/src/pointer.rs | 140 +++ .../src/{registry.rs => registry/build.rs} | 999 ++---------------- .../src/registry/index.rs | 109 ++ .../src/registry/input.rs | 133 +++ .../src/registry/mod.rs | 543 ++++++++++ crates/jsonschema-referencing/src/resolver.rs | 11 +- .../{specification => spec}/draft201909.rs | 33 +- .../subresources.rs => spec/draft202012.rs} | 23 +- .../src/{specification => spec}/draft4.rs | 47 +- .../src/{specification => spec}/draft6.rs | 33 +- .../src/{specification => spec}/draft7.rs | 33 +- .../src/{specification => spec}/ids.rs | 0 crates/jsonschema-referencing/src/spec/mod.rs | 111 ++ crates/jsonschema/src/compiler.rs | 12 +- 18 files changed, 1289 insertions(+), 1260 deletions(-) rename crates/jsonschema-referencing/src/{anchors/mod.rs => anchor.rs} (96%) delete mode 100644 crates/jsonschema-referencing/src/anchors/keys.rs rename crates/jsonschema-referencing/src/{specification/mod.rs => draft.rs} (75%) create mode 100644 crates/jsonschema-referencing/src/pointer.rs rename crates/jsonschema-referencing/src/{registry.rs => registry/build.rs} (77%) create mode 100644 crates/jsonschema-referencing/src/registry/index.rs create mode 100644 crates/jsonschema-referencing/src/registry/input.rs create mode 100644 crates/jsonschema-referencing/src/registry/mod.rs rename crates/jsonschema-referencing/src/{specification => spec}/draft201909.rs (93%) rename crates/jsonschema-referencing/src/{specification/subresources.rs => spec/draft202012.rs} (97%) rename crates/jsonschema-referencing/src/{specification => spec}/draft4.rs (92%) rename crates/jsonschema-referencing/src/{specification => spec}/draft6.rs (92%) rename crates/jsonschema-referencing/src/{specification => spec}/draft7.rs (93%) rename crates/jsonschema-referencing/src/{specification => spec}/ids.rs (100%) create mode 100644 crates/jsonschema-referencing/src/spec/mod.rs diff --git a/crates/jsonschema-referencing/src/anchors/mod.rs b/crates/jsonschema-referencing/src/anchor.rs similarity index 96% rename from crates/jsonschema-referencing/src/anchors/mod.rs rename to crates/jsonschema-referencing/src/anchor.rs index bc7b3248..5883f7a2 100644 --- a/crates/jsonschema-referencing/src/anchors/mod.rs +++ b/crates/jsonschema-referencing/src/anchor.rs @@ -1,3 +1,12 @@ +//! Anchors identify sub-schemas within a document by name. +//! +//! JSON Schema defines two anchor flavors: +//! - [`Anchor::Default`]: a plain anchor (`$anchor`), resolved against the current base URI. +//! - [`Anchor::Dynamic`]: a dynamic anchor (`$dynamicAnchor`), which re-anchors to the +//! outermost matching dynamic anchor found in the dynamic scope during resolution. +//! +//! [`AnchorIter`] avoids a heap allocation for the common case of 0–2 anchors per schema object. + use serde_json::Value; use crate::{Draft, Error, Resolved, Resolver, ResourceRef}; @@ -57,6 +66,7 @@ impl<'r> Anchor<'r> { } } +/// An iterator over 0, 1, or 2 anchors — avoids a [`Vec`] allocation for the common case. pub(crate) enum AnchorIter<'a> { Empty, One(Anchor<'a>), diff --git a/crates/jsonschema-referencing/src/anchors/keys.rs b/crates/jsonschema-referencing/src/anchors/keys.rs deleted file mode 100644 index 0588b639..00000000 --- a/crates/jsonschema-referencing/src/anchors/keys.rs +++ /dev/null @@ -1,84 +0,0 @@ -//! This module provides a mechanism for creating and managing composite keys -//! used in anchor lookups. It allows for efficient lookups without the need -//! to construct data structures with owned values. -//! -//! The key components are: -//! - `AnchorKey`: An owned version of the composite key. -//! - `AnchorKeyRef`: A borrowed version of the composite key. -//! - `BorrowDyn`: A trait that allows for dynamic borrowing of key components. -//! -//! This design enables the use of borrowed data in hash map lookups while -//! still storing owned data. -use std::{ - borrow::Borrow, - hash::{Hash, Hasher}, - sync::Arc, -}; - -use fluent_uri::Uri; - -use super::AnchorName; - -#[derive(Debug, Clone, Hash, PartialEq, Eq)] -pub(crate) struct AnchorKey { - uri: Arc>, - name: AnchorName, -} - -impl AnchorKey { - pub(crate) fn new(uri: Arc>, name: AnchorName) -> Self { - Self { uri, name } - } -} - -#[derive(Copy, Clone, Hash, PartialEq, Eq)] -pub(crate) struct AnchorKeyRef<'a> { - uri: &'a Uri, - name: &'a str, -} - -impl<'a> AnchorKeyRef<'a> { - pub(crate) fn new(uri: &'a Uri, name: &'a str) -> Self { - AnchorKeyRef { uri, name } - } - - pub(crate) fn borrow_dyn(&self) -> &dyn BorrowDyn { - self as &dyn BorrowDyn - } -} - -pub(crate) trait BorrowDyn { - fn borrowed_key(&self) -> AnchorKeyRef<'_>; -} - -impl BorrowDyn for AnchorKey { - fn borrowed_key(&self) -> AnchorKeyRef<'_> { - AnchorKeyRef::new(&self.uri, self.name.as_str()) - } -} - -impl BorrowDyn for AnchorKeyRef<'_> { - fn borrowed_key(&self) -> AnchorKeyRef<'_> { - *self - } -} - -impl<'a> Borrow for AnchorKey { - fn borrow(&self) -> &(dyn BorrowDyn + 'a) { - self - } -} - -impl Eq for dyn BorrowDyn + '_ {} - -impl PartialEq for dyn BorrowDyn + '_ { - fn eq(&self, other: &dyn BorrowDyn) -> bool { - self.borrowed_key().eq(&other.borrowed_key()) - } -} - -impl Hash for dyn BorrowDyn + '_ { - fn hash(&self, state: &mut H) { - self.borrowed_key().hash(state); - } -} diff --git a/crates/jsonschema-referencing/src/specification/mod.rs b/crates/jsonschema-referencing/src/draft.rs similarity index 75% rename from crates/jsonschema-referencing/src/specification/mod.rs rename to crates/jsonschema-referencing/src/draft.rs index 59d068e8..40f91884 100644 --- a/crates/jsonschema-referencing/src/specification/mod.rs +++ b/crates/jsonschema-referencing/src/draft.rs @@ -1,105 +1,15 @@ use serde_json::{Map, Value}; -use subresources::SubresourceIterator; - -mod draft201909; -mod draft4; -mod draft6; -mod draft7; -mod ids; -mod subresources; use crate::{ - anchors, + anchor, + spec::{ + self, draft201909, draft202012, draft4, draft6, draft7, has_ref_or_schema, ChildNode, + ObjectInfo, ObjectScan, ReferenceSlots, + }, vocabularies::{VocabularySet, DRAFT_2019_09_VOCABULARIES, DRAFT_2020_12_VOCABULARIES}, Anchor, Error, JsonPointerNode, Resolver, Resource, ResourceRef, Segments, }; -pub(crate) struct BorrowedObjectProbe<'a> { - pub(crate) id: Option<&'a str>, - pub(crate) has_anchor: bool, - pub(crate) has_ref_or_schema: bool, -} - -pub(crate) struct OwnedObjectGate<'a> { - pub(crate) id: Option<&'a str>, - pub(crate) has_anchor: bool, - pub(crate) ref_: Option<&'a str>, - pub(crate) schema: Option<&'a str>, - pub(crate) has_children: bool, -} - -#[derive(Copy, Clone)] -pub(crate) enum OwnedPathSegment<'a> { - Key(&'a str), - Index(usize), -} - -#[derive(Copy, Clone)] -pub(crate) struct OwnedScratchChild<'a> { - pub(crate) first: OwnedPathSegment<'a>, - pub(crate) second: Option>, - pub(crate) value: &'a Value, - pub(crate) draft: Draft, -} - -impl<'a> OwnedScratchChild<'a> { - #[inline] - pub(crate) fn key(key: &'a str, value: &'a Value, draft: Draft) -> Self { - Self { - first: OwnedPathSegment::Key(key), - second: None, - value, - draft, - } - } - - #[inline] - pub(crate) fn key_index(key: &'a str, index: usize, value: &'a Value, draft: Draft) -> Self { - Self { - first: OwnedPathSegment::Key(key), - second: Some(OwnedPathSegment::Index(index)), - value, - draft, - } - } - - #[inline] - pub(crate) fn key_key( - key: &'a str, - child_key: &'a str, - value: &'a Value, - draft: Draft, - ) -> Self { - Self { - first: OwnedPathSegment::Key(key), - second: Some(OwnedPathSegment::Key(child_key)), - value, - draft, - } - } -} - -#[inline] -pub(crate) fn has_ref_or_schema_object(schema: &Map) -> bool { - if schema.len() <= 3 { - for (key, value) in schema { - if (key == "$ref" || key == "$schema") && value.is_string() { - return true; - } - } - false - } else { - schema.get("$ref").and_then(Value::as_str).is_some() - || schema.get("$schema").and_then(Value::as_str).is_some() - } -} - -#[derive(Default)] -pub(crate) struct BorrowedReferenceSlots<'a> { - pub(crate) ref_: Option<&'a str>, - pub(crate) schema: Option<&'a str>, -} - /// JSON Schema specification versions. #[non_exhaustive] #[derive(Debug, Default, PartialEq, Copy, Clone, Hash, Eq, PartialOrd, Ord)] @@ -123,10 +33,12 @@ pub enum Draft { } impl Draft { + /// Wraps `contents` in a [`Resource`] tagged with this draft version. #[must_use] pub fn create_resource(self, contents: Value) -> Resource { Resource::new(contents, self) } + /// Wraps a reference to `contents` in a [`ResourceRef`] tagged with this draft version. #[must_use] pub fn create_resource_ref(self, contents: &Value) -> ResourceRef<'_> { ResourceRef::new(contents, self) @@ -156,6 +68,9 @@ impl Draft { } /// Detect what specification could be applied to the given contents. /// + /// Inspects the `$schema` field and returns the matching draft. If no `$schema` + /// field is present, returns `self` unchanged — the caller's current draft is preserved. + /// /// Returns `Draft::Unknown` for custom/unknown `$schema` values. /// Validation of custom meta-schemas happens during registry building. #[must_use] @@ -172,17 +87,16 @@ impl Draft { } pub(crate) fn id_of(self, contents: &Value) -> Option<&str> { match self { - Draft::Draft4 => ids::legacy_id(contents), - Draft::Draft6 | Draft::Draft7 => ids::legacy_dollar_id(contents), - Draft::Draft201909 | Draft::Draft202012 | Draft::Unknown => ids::dollar_id(contents), + Draft::Draft4 => spec::ids::legacy_id(contents), + Draft::Draft6 | Draft::Draft7 => spec::ids::legacy_dollar_id(contents), + Draft::Draft201909 | Draft::Draft202012 | Draft::Unknown => { + spec::ids::dollar_id(contents) + } } } #[inline] - pub(crate) fn probe_borrowed_object_map( - self, - obj: &Map, - ) -> BorrowedObjectProbe<'_> { + pub(crate) fn scan_object(self, obj: &Map) -> ObjectScan<'_> { match self { Draft::Draft4 => analyze_legacy_id_object(obj), Draft::Draft6 | Draft::Draft7 => analyze_legacy_dollar_id_object(obj), @@ -198,17 +112,17 @@ impl Draft { Draft::Draft6 => draft6::object_iter, Draft::Draft7 => draft7::object_iter, Draft::Draft201909 => draft201909::object_iter, - Draft::Draft202012 | Draft::Unknown => subresources::object_iter, + Draft::Draft202012 | Draft::Unknown => draft202012::object_iter, }; - SubresourceIterator::Object(schema.iter().flat_map(object_iter)) + draft202012::SubresourceIterator::Object(schema.iter().flat_map(object_iter)) } - None => SubresourceIterator::Empty, + None => draft202012::SubresourceIterator::Empty, } } pub(crate) fn scan_borrowed_object_into_scratch_map<'a>( self, contents: &'a Map, - references: &mut BorrowedReferenceSlots<'a>, + references: &mut ReferenceSlots<'a>, children: &mut Vec<(&'a Value, Draft)>, ) { match self { @@ -225,7 +139,7 @@ impl Draft { contents, self, references, children, ), Draft::Draft202012 | Draft::Unknown => { - subresources::scan_borrowed_object_into_scratch_map( + draft202012::scan_borrowed_object_into_scratch_map( contents, self, references, children, ); } @@ -234,8 +148,8 @@ impl Draft { pub(crate) fn scan_owned_object_into_scratch_map<'a>( self, contents: &'a Map, - references: &mut BorrowedReferenceSlots<'a>, - children: &mut Vec>, + references: &mut ReferenceSlots<'a>, + children: &mut Vec>, ) -> (Option<&'a str>, bool) { match self { Draft::Draft4 => { @@ -250,23 +164,18 @@ impl Draft { Draft::Draft201909 => draft201909::scan_owned_object_into_scratch_map( contents, self, references, children, ), - Draft::Draft202012 | Draft::Unknown => { - subresources::scan_owned_object_into_scratch_map( - contents, self, references, children, - ) - } + Draft::Draft202012 | Draft::Unknown => draft202012::scan_owned_object_into_scratch_map( + contents, self, references, children, + ), } } - pub(crate) fn owned_object_gate_map( - self, - contents: &Map, - ) -> OwnedObjectGate<'_> { + pub(crate) fn object_info(self, contents: &Map) -> ObjectInfo<'_> { match self { - Draft::Draft4 => draft4::owned_object_gate_map(contents), - Draft::Draft6 => draft6::owned_object_gate_map(contents), - Draft::Draft7 => draft7::owned_object_gate_map(contents), - Draft::Draft201909 => draft201909::owned_object_gate_map(contents), - Draft::Draft202012 | Draft::Unknown => subresources::owned_object_gate_map(contents), + Draft::Draft4 => draft4::object_info(contents), + Draft::Draft6 => draft6::object_info(contents), + Draft::Draft7 => draft7::object_info(contents), + Draft::Draft201909 => draft201909::object_info(contents), + Draft::Draft202012 | Draft::Unknown => draft202012::object_info(contents), } } pub(crate) fn walk_borrowed_subresources_map<'a, E, F>( @@ -283,7 +192,7 @@ impl Draft { Draft::Draft7 => draft7::walk_borrowed_subresources_map(contents, self, f), Draft::Draft201909 => draft201909::walk_borrowed_subresources_map(contents, self, f), Draft::Draft202012 | Draft::Unknown => { - subresources::walk_borrowed_subresources_map(contents, self, f) + draft202012::walk_borrowed_subresources_map(contents, self, f) } } } @@ -302,16 +211,16 @@ impl Draft { Draft::Draft7 => draft7::walk_owned_subresources_map(contents, path, self, f), Draft::Draft201909 => draft201909::walk_owned_subresources_map(contents, path, self, f), Draft::Draft202012 | Draft::Unknown => { - subresources::walk_owned_subresources_map(contents, path, self, f) + draft202012::walk_owned_subresources_map(contents, path, self, f) } } } pub(crate) fn anchors(self, contents: &Value) -> impl Iterator> { match self { - Draft::Draft4 => anchors::legacy_anchor_in_id(self, contents), - Draft::Draft6 | Draft::Draft7 => anchors::legacy_anchor_in_dollar_id(self, contents), - Draft::Draft201909 => anchors::anchor_2019(self, contents), - Draft::Draft202012 | Draft::Unknown => anchors::anchor(self, contents), + Draft::Draft4 => anchor::legacy_anchor_in_id(self, contents), + Draft::Draft6 | Draft::Draft7 => anchor::legacy_anchor_in_dollar_id(self, contents), + Draft::Draft201909 => anchor::anchor_2019(self, contents), + Draft::Draft202012 | Draft::Unknown => anchor::anchor(self, contents), } } pub(crate) fn maybe_in_subresource<'r>( @@ -328,7 +237,7 @@ impl Draft { draft201909::maybe_in_subresource(segments, resolver, subresource) } Draft::Draft202012 | Draft::Unknown => { - subresources::maybe_in_subresource(segments, resolver, subresource) + draft202012::maybe_in_subresource(segments, resolver, subresource) } } } @@ -428,9 +337,9 @@ impl Draft { } } -fn analyze_legacy_id_object(obj: &Map) -> BorrowedObjectProbe<'_> { +fn analyze_legacy_id_object(obj: &Map) -> ObjectScan<'_> { if obj.len() <= 3 { - return scan_legacy_id_probe_small(obj); + return scan_legacy_id_small(obj); } let raw_id = obj.get("id").and_then(Value::as_str); @@ -442,14 +351,14 @@ fn analyze_legacy_id_object(obj: &Map) -> BorrowedObjectProbe<'_> _ => None, }; - BorrowedObjectProbe { + ObjectScan { id, has_anchor, has_ref_or_schema, } } -fn scan_legacy_id_probe_small(obj: &Map) -> BorrowedObjectProbe<'_> { +fn scan_legacy_id_small(obj: &Map) -> ObjectScan<'_> { let mut raw_id = None; let mut has_ref = false; let mut has_schema = false; @@ -469,16 +378,16 @@ fn scan_legacy_id_probe_small(obj: &Map) -> BorrowedObjectProbe<' _ => None, }; - BorrowedObjectProbe { + ObjectScan { id, has_anchor, has_ref_or_schema: has_ref || has_schema, } } -fn analyze_legacy_dollar_id_object(obj: &Map) -> BorrowedObjectProbe<'_> { +fn analyze_legacy_dollar_id_object(obj: &Map) -> ObjectScan<'_> { if obj.len() <= 3 { - return scan_legacy_dollar_id_probe_small(obj); + return scan_legacy_dollar_id_small(obj); } let raw_id = obj.get("$id").and_then(Value::as_str); @@ -490,14 +399,14 @@ fn analyze_legacy_dollar_id_object(obj: &Map) -> BorrowedObjectPr _ => None, }; - BorrowedObjectProbe { + ObjectScan { id, has_anchor, has_ref_or_schema, } } -fn scan_legacy_dollar_id_probe_small(obj: &Map) -> BorrowedObjectProbe<'_> { +fn scan_legacy_dollar_id_small(obj: &Map) -> ObjectScan<'_> { let mut raw_id = None; let mut has_ref = false; let mut has_schema = false; @@ -517,26 +426,26 @@ fn scan_legacy_dollar_id_probe_small(obj: &Map) -> BorrowedObject _ => None, }; - BorrowedObjectProbe { + ObjectScan { id, has_anchor, has_ref_or_schema: has_ref || has_schema, } } -fn analyze_id_and_anchor_object(obj: &Map) -> BorrowedObjectProbe<'_> { +fn analyze_id_and_anchor_object(obj: &Map) -> ObjectScan<'_> { if obj.len() <= 2 { - return scan_id_and_anchor_probe_small(obj); + return scan_id_and_anchor_small(obj); } - BorrowedObjectProbe { + ObjectScan { id: obj.get("$id").and_then(Value::as_str), has_anchor: obj.get("$anchor").and_then(Value::as_str).is_some(), - has_ref_or_schema: has_ref_or_schema_object(obj), + has_ref_or_schema: has_ref_or_schema(obj), } } -fn scan_id_and_anchor_probe_small(obj: &Map) -> BorrowedObjectProbe<'_> { +fn scan_id_and_anchor_small(obj: &Map) -> ObjectScan<'_> { let mut id = None; let mut has_anchor = false; let mut has_ref_or_schema = false; @@ -550,27 +459,27 @@ fn scan_id_and_anchor_probe_small(obj: &Map) -> BorrowedObjectPro } } - BorrowedObjectProbe { + ObjectScan { id, has_anchor, has_ref_or_schema, } } -fn analyze_id_and_any_anchor_object(obj: &Map) -> BorrowedObjectProbe<'_> { +fn analyze_id_and_any_anchor_object(obj: &Map) -> ObjectScan<'_> { if obj.len() <= 3 { - return scan_id_and_any_anchor_probe_small(obj); + return scan_id_and_any_anchor_small(obj); } - BorrowedObjectProbe { + ObjectScan { id: obj.get("$id").and_then(Value::as_str), has_anchor: obj.get("$anchor").and_then(Value::as_str).is_some() || obj.get("$dynamicAnchor").and_then(Value::as_str).is_some(), - has_ref_or_schema: has_ref_or_schema_object(obj), + has_ref_or_schema: has_ref_or_schema(obj), } } -fn scan_id_and_any_anchor_probe_small(obj: &Map) -> BorrowedObjectProbe<'_> { +fn scan_id_and_any_anchor_small(obj: &Map) -> ObjectScan<'_> { let mut id = None; let mut has_anchor = false; let mut has_ref_or_schema = false; @@ -584,7 +493,7 @@ fn scan_id_and_any_anchor_probe_small(obj: &Map) -> BorrowedObjec } } - BorrowedObjectProbe { + ObjectScan { id, has_anchor, has_ref_or_schema, diff --git a/crates/jsonschema-referencing/src/lib.rs b/crates/jsonschema-referencing/src/lib.rs index a9c52361..8d1f975a 100644 --- a/crates/jsonschema-referencing/src/lib.rs +++ b/crates/jsonschema-referencing/src/lib.rs @@ -11,37 +11,38 @@ macro_rules! observe_registry { }}; } -mod anchors; +mod anchor; mod cache; +mod draft; mod error; mod list; pub mod meta; mod path; +mod pointer; mod registry; mod resolver; mod resource; mod retriever; mod segments; mod small_map; -mod specification; +mod spec; pub mod uri; mod vocabularies; -pub(crate) use anchors::Anchor; +pub(crate) use anchor::Anchor; +pub use draft::Draft; pub use error::{Error, UriError}; pub use fluent_uri::{Iri, IriRef, Uri, UriRef}; pub use list::List; #[doc(hidden)] pub use path::{write_escaped_str, write_index}; pub use path::{JsonPointerNode, JsonPointerSegment, OwnedJsonPointer}; -pub use registry::{ - parse_index, pointer, IntoRegistryResource, Registry, RegistryBuilder, SPECIFICATIONS, -}; +pub use pointer::{parse_index, pointer}; +pub use registry::{IntoRegistryResource, Registry, RegistryBuilder, SPECIFICATIONS}; pub use resolver::{Resolved, Resolver}; pub use resource::{unescape_segment, Resource, ResourceRef}; pub use retriever::{DefaultRetriever, Retrieve}; pub(crate) use segments::Segments; -pub use specification::Draft; pub use vocabularies::{Vocabulary, VocabularySet}; #[cfg(feature = "retrieve-async")] diff --git a/crates/jsonschema-referencing/src/pointer.rs b/crates/jsonschema-referencing/src/pointer.rs new file mode 100644 index 00000000..f46e83ca --- /dev/null +++ b/crates/jsonschema-referencing/src/pointer.rs @@ -0,0 +1,140 @@ +use serde_json::Value; + +use crate::{ + path::{JsonPointerNode, JsonPointerSegment}, + resource::unescape_segment, +}; + +#[derive(Debug, Clone, Default)] +pub(crate) struct ParsedPointer { + pub(crate) segments: Vec, +} + +impl ParsedPointer { + pub(crate) fn from_json_pointer(pointer: &str) -> Option { + if pointer.is_empty() { + return Some(Self::default()); + } + if !pointer.starts_with('/') { + return None; + } + + let mut segments = Vec::new(); + for token in pointer.split('/').skip(1).map(unescape_segment) { + if let Some(index) = parse_index(&token) { + segments.push(ParsedPointerSegment::Index(index)); + } else { + segments.push(ParsedPointerSegment::Key( + token.into_owned().into_boxed_str(), + )); + } + } + Some(Self { segments }) + } + + pub(crate) fn from_pointer_node(path: &JsonPointerNode<'_, '_>) -> Self { + let mut segments = Vec::new(); + let mut head = path; + + while let Some(parent) = head.parent() { + segments.push(match head.segment() { + JsonPointerSegment::Key(key) => ParsedPointerSegment::Key(key.as_ref().into()), + JsonPointerSegment::Index(idx) => ParsedPointerSegment::Index(*idx), + }); + head = parent; + } + + segments.reverse(); + Self { segments } + } + + pub(crate) fn lookup<'a>(&self, document: &'a Value) -> Option<&'a Value> { + self.segments + .iter() + .try_fold(document, |target, token| match token { + ParsedPointerSegment::Key(key) => match target { + Value::Object(map) => map.get(&**key), + _ => None, + }, + ParsedPointerSegment::Index(index) => match target { + Value::Array(list) => list.get(*index), + _ => None, + }, + }) + } +} + +#[derive(Debug, Clone)] +pub(crate) enum ParsedPointerSegment { + Key(Box), + Index(usize), +} + +/// Look up a value by a JSON Pointer. +/// +/// **NOTE**: A slightly faster version of pointer resolution based on `Value::pointer` from `serde_json`. +pub fn pointer<'a>(document: &'a Value, pointer: &str) -> Option<&'a Value> { + crate::observe_registry!( + "registry.pointer_segments={}", + bytecount::count(pointer.as_bytes(), b'/') + ); + if pointer.is_empty() { + return Some(document); + } + if !pointer.starts_with('/') { + return None; + } + pointer.split('/').skip(1).map(unescape_segment).try_fold( + document, + |target, token| match target { + Value::Object(map) => map.get(&*token), + Value::Array(list) => parse_index(&token).and_then(|x| list.get(x)), + _ => None, + }, + ) +} + +// Taken from `serde_json`. +#[must_use] +pub fn parse_index(s: &str) -> Option { + if s.starts_with('+') || (s.starts_with('0') && s.len() != 1) { + return None; + } + s.parse().ok() +} + +#[cfg(test)] +mod tests { + use serde_json::json; + + use crate::JsonPointerNode; + + use super::{pointer, ParsedPointer}; + + #[test] + fn test_empty_pointer() { + let document = json!({}); + assert_eq!(pointer(&document, ""), Some(&document)); + } + + #[test] + fn test_parsed_pointer_from_json_pointer_node_matches_pointer_lookup() { + let document = json!({ + "$defs": { + "foo/bar": [ + {"value": true} + ] + } + }); + let root = JsonPointerNode::new(); + let defs = root.push("$defs"); + let entry = defs.push("foo/bar"); + let node = entry.push(0); + + let parsed = ParsedPointer::from_pointer_node(&node); + assert_eq!( + parsed.lookup(&document), + pointer(&document, "/$defs/foo~1bar/0") + ); + } +} diff --git a/crates/jsonschema-referencing/src/registry.rs b/crates/jsonschema-referencing/src/registry/build.rs similarity index 77% rename from crates/jsonschema-referencing/src/registry.rs rename to crates/jsonschema-referencing/src/registry/build.rs index 4ceb8f49..30a6b1ed 100644 --- a/crates/jsonschema-referencing/src/registry.rs +++ b/crates/jsonschema-referencing/src/registry/build.rs @@ -1,35 +1,41 @@ -use std::{ - borrow::Cow, - collections::VecDeque, - fmt, - num::NonZeroUsize, - sync::{Arc, LazyLock}, -}; +//! BFS pipeline that processes pending resources into the prepared index. +//! +//! Entry points: +//! - [`process_resources_mixed`]: processes owned and borrowed resources together. +//! - [`build_prepared_index_for_documents`]: builds an index from pre-stored documents +//! (used by the static [`super::SPECIFICATIONS`] registry). +//! +//! [`StoredDocument`] wraps a [`Cow`](std::borrow::Cow) so the registry holds +//! both borrowed (externally-owned, zero-copy) and owned (retrieved) documents uniformly. + +use std::{borrow::Cow, collections::VecDeque, num::NonZeroUsize, sync::Arc}; use ahash::{AHashMap, AHashSet}; use fluent_uri::{pct_enc::EStr, Uri}; use serde_json::Value; use crate::{ - cache::{SharedUriCache, UriCache}, - meta::{self, metas_for_draft}, - resource::unescape_segment, - small_map::SmallMap, - uri, - vocabularies::{self, VocabularySet}, - Anchor, DefaultRetriever, Draft, Error, JsonPointerNode, JsonPointerSegment, Resolver, - Resource, ResourceRef, Retrieve, + cache::UriCache, + meta::metas_for_draft, + pointer::{pointer, ParsedPointer, ParsedPointerSegment}, + uri, Anchor, Draft, Error, JsonPointerNode, ResourceRef, Retrieve, +}; + +use super::{ + index::{IndexedAnchor, IndexedAnchorKind, IndexedResource, PreparedIndex}, + input::PendingResource, }; +/// A schema document stored in the registry, either borrowed from the caller or owned. #[derive(Debug)] -struct StoredDocument<'a> { +pub(super) struct StoredDocument<'a> { value: Cow<'a, Value>, draft: Draft, } impl<'a> StoredDocument<'a> { #[inline] - fn owned(value: Value, draft: Draft) -> Self { + pub(super) fn owned(value: Value, draft: Draft) -> Self { Self { value: Cow::Owned(value), draft, @@ -37,7 +43,7 @@ impl<'a> StoredDocument<'a> { } #[inline] - fn borrowed(value: &'a Value, draft: Draft) -> Self { + pub(super) fn borrowed(value: &'a Value, draft: Draft) -> Self { Self { value: Cow::Borrowed(value), draft, @@ -45,7 +51,7 @@ impl<'a> StoredDocument<'a> { } #[inline] - fn contents(&self) -> &Value { + pub(super) fn contents(&self) -> &Value { &self.value } @@ -58,779 +64,15 @@ impl<'a> StoredDocument<'a> { } #[inline] - fn draft(&self) -> Draft { + pub(super) fn draft(&self) -> Draft { self.draft } } -type DocumentStore<'a> = AHashMap>, Arc>>; -type AnchorKey = Box; - -#[derive(Debug, Clone, Default)] -struct PreparedIndex<'a> { - resources: SmallMap>, IndexedResource<'a>>, - anchors: SmallMap>, SmallMap>>, -} - -#[derive(Debug, Clone)] -enum IndexedResource<'a> { - Borrowed(ResourceRef<'a>), - Owned { - document: Arc>, - pointer: ParsedPointer, - draft: Draft, - }, -} - -impl IndexedResource<'_> { - #[inline] - fn resolve(&self) -> Option> { - match self { - IndexedResource::Borrowed(resource) => { - Some(ResourceRef::new(resource.contents(), resource.draft())) - } - IndexedResource::Owned { - document, - pointer, - draft, - } => { - let contents = pointer.lookup(document.contents())?; - Some(ResourceRef::new(contents, *draft)) - } - } - } -} - -type BorrowedAnchor<'a> = Anchor<'a>; - -#[derive(Debug, Clone)] -enum IndexedAnchor<'a> { - Borrowed(BorrowedAnchor<'a>), - Owned { - document: Arc>, - pointer: ParsedPointer, - draft: Draft, - kind: IndexedAnchorKind, - name: Box, - }, -} - -impl IndexedAnchor<'_> { - #[inline] - fn resolve(&self) -> Option> { - match self { - IndexedAnchor::Borrowed(anchor) => Some(match anchor { - Anchor::Default { name, resource } => Anchor::Default { - name, - resource: ResourceRef::new(resource.contents(), resource.draft()), - }, - Anchor::Dynamic { name, resource } => Anchor::Dynamic { - name, - resource: ResourceRef::new(resource.contents(), resource.draft()), - }, - }), - IndexedAnchor::Owned { - document, - pointer, - draft, - kind, - name, - } => { - let contents = pointer.lookup(document.contents())?; - let resource = ResourceRef::new(contents, *draft); - Some(match kind { - IndexedAnchorKind::Default => Anchor::Default { name, resource }, - IndexedAnchorKind::Dynamic => Anchor::Dynamic { name, resource }, - }) - } - } - } -} - -#[derive(Debug, Clone, Copy, PartialEq, Eq)] -enum IndexedAnchorKind { - Default, - Dynamic, -} - -#[derive(Debug, Clone, Default)] -struct ParsedPointer { - segments: Vec, -} - -impl ParsedPointer { - fn from_json_pointer(pointer: &str) -> Option { - if pointer.is_empty() { - return Some(Self::default()); - } - if !pointer.starts_with('/') { - return None; - } - - let mut segments = Vec::new(); - for token in pointer.split('/').skip(1).map(unescape_segment) { - if let Some(index) = parse_index(&token) { - segments.push(ParsedPointerSegment::Index(index)); - } else { - segments.push(ParsedPointerSegment::Key( - token.into_owned().into_boxed_str(), - )); - } - } - Some(Self { segments }) - } - - fn from_pointer_node(path: &JsonPointerNode<'_, '_>) -> Self { - let mut segments = Vec::new(); - let mut head = path; - - while let Some(parent) = head.parent() { - segments.push(match head.segment() { - JsonPointerSegment::Key(key) => ParsedPointerSegment::Key(key.as_ref().into()), - JsonPointerSegment::Index(idx) => ParsedPointerSegment::Index(*idx), - }); - head = parent; - } - - segments.reverse(); - Self { segments } - } - - fn lookup<'a>(&self, document: &'a Value) -> Option<&'a Value> { - self.segments - .iter() - .try_fold(document, |target, token| match token { - ParsedPointerSegment::Key(key) => match target { - Value::Object(map) => map.get(&**key), - _ => None, - }, - ParsedPointerSegment::Index(index) => match target { - Value::Array(list) => list.get(*index), - _ => None, - }, - }) - } -} - -#[derive(Debug, Clone)] -enum ParsedPointerSegment { - Key(Box), - Index(usize), -} - -/// Pre-loaded registry containing all JSON Schema meta-schemas and their vocabularies -pub static SPECIFICATIONS: LazyLock> = - LazyLock::new(|| Registry::build_from_meta_schemas(meta::META_SCHEMAS_ALL.as_slice())); - -/// A registry of JSON Schema resources, each identified by their canonical URIs. -/// -/// `Registry` is a prepared registry: add resources with [`Registry::new`] and -/// [`RegistryBuilder::add`], then call [`RegistryBuilder::prepare`] to build the -/// reusable registry. To resolve `$ref` references directly, create a [`Resolver`] -/// from the prepared registry: -/// -/// ```rust -/// use referencing::Registry; -/// -/// # fn main() -> Result<(), Box> { -/// let schema = serde_json::json!({ -/// "$schema": "https://json-schema.org/draft/2020-12/schema", -/// "$id": "https://example.com/root", -/// "$defs": { "item": { "type": "string" } }, -/// "items": { "$ref": "#/$defs/item" } -/// }); -/// -/// let registry = Registry::new() -/// .add("https://example.com/root", schema)? -/// .prepare()?; -/// -/// let resolver = registry.resolver(referencing::uri::from_str("https://example.com/root")?); -/// # Ok(()) -/// # } -/// ``` -#[derive(Debug, Clone)] -pub struct Registry<'a> { - baseline: Option<&'a Registry<'a>>, - resolution_cache: SharedUriCache, - known_resources: KnownResources, - index_data: PreparedIndex<'a>, -} - -#[derive(Clone)] -pub struct RegistryBuilder<'a> { - baseline: Option<&'a Registry<'a>>, - pending: AHashMap, PendingResource<'a>>, - retriever: Arc, - #[cfg(feature = "retrieve-async")] - async_retriever: Option>, - draft: Option, -} - -impl fmt::Debug for RegistryBuilder<'_> { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - f.debug_struct("RegistryBuilder") - .field("has_baseline", &self.baseline.is_some()) - .field("pending_len", &self.pending.len()) - .field("draft", &self.draft) - .finish() - } -} - -#[derive(Clone)] -pub(crate) enum PendingResource<'a> { - OwnedValue(Value), - BorrowedValue(&'a Value), - OwnedResource(Resource), - BorrowedResource(ResourceRef<'a>), -} - -pub(crate) mod private { - use ahash::AHashMap; - use fluent_uri::Uri; - - use super::PendingResource; - - pub(crate) trait Sealed<'a> { - fn insert_into( - self, - pending: &mut AHashMap, PendingResource<'a>>, - uri: Uri, - ); - } -} - -#[allow(private_bounds)] -pub trait IntoRegistryResource<'a>: private::Sealed<'a> {} - -impl<'a, T> IntoRegistryResource<'a> for T where T: private::Sealed<'a> {} - -impl<'a> private::Sealed<'a> for Resource { - fn insert_into( - self, - pending: &mut AHashMap, PendingResource<'a>>, - uri: Uri, - ) { - pending.insert(uri, PendingResource::OwnedResource(self)); - } -} - -impl<'a> private::Sealed<'a> for &'a Resource { - fn insert_into( - self, - pending: &mut AHashMap, PendingResource<'a>>, - uri: Uri, - ) { - pending.insert( - uri, - PendingResource::BorrowedResource(ResourceRef::new(self.contents(), self.draft())), - ); - } -} - -impl<'a> private::Sealed<'a> for &'a Value { - fn insert_into( - self, - pending: &mut AHashMap, PendingResource<'a>>, - uri: Uri, - ) { - pending.insert(uri, PendingResource::BorrowedValue(self)); - } -} - -impl<'a> private::Sealed<'a> for ResourceRef<'a> { - fn insert_into( - self, - pending: &mut AHashMap, PendingResource<'a>>, - uri: Uri, - ) { - pending.insert(uri, PendingResource::BorrowedResource(self)); - } -} - -impl<'a> private::Sealed<'a> for Value { - fn insert_into( - self, - pending: &mut AHashMap, PendingResource<'a>>, - uri: Uri, - ) { - pending.insert(uri, PendingResource::OwnedValue(self)); - } -} - -impl<'a> RegistryBuilder<'a> { - fn new() -> Self { - Self { - baseline: None, - pending: AHashMap::new(), - retriever: Arc::new(DefaultRetriever), - #[cfg(feature = "retrieve-async")] - async_retriever: None, - draft: None, - } - } - - fn from_registry(registry: &'a Registry<'a>) -> Self { - Self { - baseline: Some(registry), - pending: AHashMap::new(), - retriever: Arc::new(DefaultRetriever), - #[cfg(feature = "retrieve-async")] - async_retriever: None, - draft: None, - } - } - - #[must_use] - pub fn draft(mut self, draft: Draft) -> Self { - self.draft = Some(draft); - self - } - - #[must_use] - pub fn retriever(mut self, retriever: impl IntoRetriever) -> Self { - self.retriever = retriever.into_retriever(); - self - } - - #[cfg(feature = "retrieve-async")] - #[must_use] - pub fn async_retriever(mut self, retriever: impl IntoAsyncRetriever) -> Self { - self.async_retriever = Some(retriever.into_retriever()); - self - } - - /// Add a resource to the registry builder. - /// - /// # Errors - /// - /// Returns an error if the URI is invalid. - pub fn add<'b>( - self, - uri: impl AsRef, - resource: impl IntoRegistryResource<'b>, - ) -> Result, Error> - where - 'a: 'b, - { - let parsed = uri::from_str(uri.as_ref().trim_end_matches('#'))?; - let mut pending: AHashMap, PendingResource<'b>> = - self.pending.into_iter().collect(); - private::Sealed::insert_into(resource, &mut pending, parsed); - Ok(RegistryBuilder { - baseline: self.baseline, - pending, - retriever: self.retriever, - #[cfg(feature = "retrieve-async")] - async_retriever: self.async_retriever, - draft: self.draft, - }) - } - - /// Add multiple resources to the registry builder. - /// - /// # Errors - /// - /// Returns an error if any URI is invalid. - pub fn extend<'b, I, U, T>(self, pairs: I) -> Result, Error> - where - 'a: 'b, - I: IntoIterator, - U: AsRef, - T: IntoRegistryResource<'b>, - { - let mut builder = RegistryBuilder { - baseline: self.baseline, - pending: self.pending.into_iter().collect(), - retriever: self.retriever, - #[cfg(feature = "retrieve-async")] - async_retriever: self.async_retriever, - draft: self.draft, - }; - for (uri, resource) in pairs { - builder = builder.add(uri, resource)?; - } - Ok(builder) - } - - /// Prepare the registry for reuse. - /// - /// # Errors - /// - /// Returns an error if URI processing, retrieval, or custom meta-schema validation fails. - pub fn prepare(self) -> Result, Error> { - if let Some(baseline) = self.baseline { - baseline.try_with_pending_resources_and_retriever( - self.pending, - &*self.retriever, - self.draft, - ) - } else { - Registry::try_from_pending_resources_impl(self.pending, &*self.retriever, self.draft) - } - } - - #[cfg(feature = "retrieve-async")] - /// Prepare the registry for reuse with async retrieval. - /// - /// # Errors - /// - /// Returns an error if URI processing, retrieval, or custom meta-schema validation fails. - pub async fn async_prepare(self) -> Result, Error> { - let retriever = self - .async_retriever - .unwrap_or_else(|| Arc::new(DefaultRetriever)); - if let Some(baseline) = self.baseline { - baseline - .try_with_pending_resources_and_retriever_async( - self.pending, - &*retriever, - self.draft, - ) - .await - } else { - Registry::try_from_pending_resources_async_impl(self.pending, &*retriever, self.draft) - .await - } - } -} - -impl<'a> Registry<'a> { - /// Add a resource to a prepared registry, returning a builder that must be prepared again. - /// - /// # Errors - /// - /// Returns an error if the URI is invalid. - pub fn add<'b>( - &'b self, - uri: impl AsRef, - resource: impl IntoRegistryResource<'b>, - ) -> Result, Error> - where - 'a: 'b, - { - RegistryBuilder::from_registry(self).add(uri, resource) - } - - /// Add multiple resources to a prepared registry, returning a builder that - /// must be prepared again. - /// - /// # Errors - /// - /// Returns an error if any URI is invalid. - pub fn extend<'b, I, U, T>(&'b self, pairs: I) -> Result, Error> - where - 'a: 'b, - I: IntoIterator, - U: AsRef, - T: IntoRegistryResource<'b>, - { - RegistryBuilder::from_registry(self).extend(pairs) - } -} - -pub trait IntoRetriever { - fn into_retriever(self) -> Arc; -} - -impl IntoRetriever for T { - fn into_retriever(self) -> Arc { - Arc::new(self) - } -} - -impl IntoRetriever for Arc { - fn into_retriever(self) -> Arc { - self - } -} - -#[cfg(feature = "retrieve-async")] -pub trait IntoAsyncRetriever { - fn into_retriever(self) -> Arc; -} - -#[cfg(feature = "retrieve-async")] -impl IntoAsyncRetriever for T { - fn into_retriever(self) -> Arc { - Arc::new(self) - } -} - -#[cfg(feature = "retrieve-async")] -impl IntoAsyncRetriever for Arc { - fn into_retriever(self) -> Arc { - self - } -} - -impl Registry<'static> { - #[allow(clippy::new_ret_no_self)] - #[must_use] - pub fn new<'a>() -> RegistryBuilder<'a> { - RegistryBuilder::new() - } - - fn try_from_pending_resources_impl<'a>( - pairs: impl IntoIterator, PendingResource<'a>)>, - retriever: &dyn Retrieve, - draft: Option, - ) -> Result, Error> { - let mut documents = DocumentStore::new(); - let mut known_resources = KnownResources::new(); - let mut resolution_cache = UriCache::new(); - - let (custom_metaschemas, index_data) = process_resources_mixed( - pairs, - retriever, - &mut documents, - &mut known_resources, - &mut resolution_cache, - draft, - )?; - - validate_custom_metaschemas(&custom_metaschemas, &known_resources)?; - - Ok(Registry { - baseline: None, - resolution_cache: resolution_cache.into_shared(), - known_resources, - index_data, - }) - } - - #[cfg(feature = "retrieve-async")] - async fn try_from_pending_resources_async_impl<'a>( - pairs: impl IntoIterator, PendingResource<'a>)>, - retriever: &dyn crate::AsyncRetrieve, - draft: Option, - ) -> Result, Error> { - let mut documents = DocumentStore::new(); - let mut known_resources = KnownResources::new(); - let mut resolution_cache = UriCache::new(); - - let (custom_metaschemas, index_data) = process_resources_async_mixed( - pairs, - retriever, - &mut documents, - &mut known_resources, - &mut resolution_cache, - draft, - ) - .await?; - - validate_custom_metaschemas(&custom_metaschemas, &known_resources)?; - - Ok(Registry { - baseline: None, - resolution_cache: resolution_cache.into_shared(), - known_resources, - index_data, - }) - } - - /// Build a registry with all the given meta-schemas from specs. - pub(crate) fn build_from_meta_schemas(schemas: &[(&'static str, &'static Value)]) -> Self { - let mut documents = DocumentStore::with_capacity(schemas.len()); - let mut known_resources = KnownResources::with_capacity(schemas.len()); - - for (uri, schema) in schemas { - let parsed = - uri::from_str(uri.trim_end_matches('#')).expect("meta-schema URI must be valid"); - let key = Arc::new(parsed); - let draft = Draft::default().detect(schema); - known_resources.insert((*key).clone()); - documents.insert(key, Arc::new(StoredDocument::borrowed(schema, draft))); - } - - let mut resolution_cache = UriCache::with_capacity(35); - let index_data = build_prepared_index_for_documents(&documents, &mut resolution_cache) - .expect("meta-schema index data must build"); - - Self { - baseline: None, - resolution_cache: resolution_cache.into_shared(), - known_resources, - index_data, - } - } -} - -impl<'a> Registry<'a> { - fn try_with_pending_resources_and_retriever( - &'a self, - pairs: impl IntoIterator, PendingResource<'a>)>, - retriever: &dyn Retrieve, - draft: Option, - ) -> Result, Error> { - let mut documents = DocumentStore::new(); - let mut resolution_cache = UriCache::new(); - let mut known_resources = self.known_resources.clone(); - - let (custom_metaschemas, index_data) = process_resources_mixed( - pairs, - retriever, - &mut documents, - &mut known_resources, - &mut resolution_cache, - draft, - )?; - validate_custom_metaschemas(&custom_metaschemas, &known_resources)?; - - Ok(Registry { - baseline: Some(self), - resolution_cache: resolution_cache.into_shared(), - known_resources, - index_data, - }) - } - - #[cfg(feature = "retrieve-async")] - async fn try_with_pending_resources_and_retriever_async( - &'a self, - pairs: impl IntoIterator, PendingResource<'a>)>, - retriever: &dyn crate::AsyncRetrieve, - draft: Option, - ) -> Result, Error> { - let mut documents = DocumentStore::new(); - let mut resolution_cache = UriCache::new(); - let mut known_resources = self.known_resources.clone(); - - let (custom_metaschemas, index_data) = process_resources_async_mixed( - pairs, - retriever, - &mut documents, - &mut known_resources, - &mut resolution_cache, - draft, - ) - .await?; - validate_custom_metaschemas(&custom_metaschemas, &known_resources)?; - - Ok(Registry { - baseline: Some(self), - resolution_cache: resolution_cache.into_shared(), - known_resources, - index_data, - }) - } - - /// Resolves a reference URI against a base URI using registry's cache. - /// - /// # Errors - /// - /// Returns an error if base has not schema or there is a fragment. - pub fn resolve_against(&self, base: &Uri<&str>, uri: &str) -> Result>, Error> { - self.resolution_cache.resolve_against(base, uri) - } - - #[must_use] - pub fn contains_resource_uri(&self, uri: &str) -> bool { - let Ok(uri) = uri::from_str(uri) else { - return false; - }; - self.resource_by_uri(&uri).is_some() - } - - #[must_use] - pub fn contains_anchor(&self, uri: &str, name: &str) -> bool { - let Ok(uri) = uri::from_str(uri) else { - return false; - }; - self.contains_anchor_uri(&uri, name) - } - - #[must_use] - pub fn resolver(&self, base_uri: Uri) -> Resolver<'_> { - Resolver::new(self, Arc::new(base_uri)) - } - - #[must_use] - pub fn find_vocabularies(&self, draft: Draft, contents: &Value) -> VocabularySet { - match draft.detect(contents) { - Draft::Unknown => { - if let Some(specification) = contents - .as_object() - .and_then(|obj| obj.get("$schema")) - .and_then(|s| s.as_str()) - { - if let Ok(mut uri) = uri::from_str(specification) { - uri.set_fragment(None); - if let Some(resource) = self.resource_by_uri(&uri) { - if let Ok(Some(vocabularies)) = vocabularies::find(resource.contents()) - { - return vocabularies; - } - } - } - } - Draft::Unknown.default_vocabularies() - } - draft => draft.default_vocabularies(), - } - } - - #[inline] - pub(crate) fn resource_by_uri(&self, uri: &Uri) -> Option> { - self.index_data - .resources - .get(uri) - .and_then(IndexedResource::resolve) - .or_else(|| { - self.baseline - .and_then(|baseline| baseline.resource_by_uri(uri)) - }) - } - - pub(crate) fn contains_anchor_uri(&self, uri: &Uri, name: &str) -> bool { - self.index_data - .anchors - .get(uri) - .is_some_and(|entries| entries.contains_key(name)) - || self - .baseline - .is_some_and(|baseline| baseline.contains_anchor_uri(uri, name)) - } - - fn local_anchor_by_uri(&self, uri: &Uri, name: &str) -> Option> { - self.index_data - .anchors - .get(uri) - .and_then(|entries| entries.get(name)) - .and_then(IndexedAnchor::resolve) - } - - fn anchor_exact(&self, uri: &Uri, name: &str) -> Option> { - self.local_anchor_by_uri(uri, name).or_else(|| { - self.baseline - .and_then(|baseline| baseline.anchor_exact(uri, name)) - }) - } - - pub(crate) fn anchor(&self, uri: &Uri, name: &str) -> Result, Error> { - if let Some(anchor) = self.anchor_exact(uri, name) { - return Ok(anchor); - } - - if let Some(resource) = self.resource_by_uri(uri) { - if let Some(id) = resource.id() { - let canonical = uri::from_str(id)?; - if let Some(anchor) = self.anchor_exact(&canonical, name) { - return Ok(anchor); - } - } - } - - if name.contains('/') { - Err(Error::invalid_anchor(name.to_string())) - } else { - Err(Error::no_such_anchor(name.to_string())) - } - } -} - +pub(super) type DocumentStore<'a> = AHashMap>, Arc>>; /// Build prepared local index data for all documents already in `documents`. /// Used by `build_from_meta_schemas` for the static SPECIFICATIONS registry. -fn build_prepared_index_for_documents<'a>( +pub(super) fn build_prepared_index_for_documents<'a>( documents: &DocumentStore<'a>, resolution_cache: &mut UriCache, ) -> Result, Error> { @@ -858,10 +100,8 @@ fn build_prepared_index_for_documents<'a>( )?; } else { let mut local_seen = LocalSeen::new(); - let mut owned_reference_scratch = - crate::specification::BorrowedReferenceSlots::default(); - let mut owned_child_scratch: Vec> = - Vec::new(); + let mut owned_reference_scratch = crate::spec::ReferenceSlots::default(); + let mut owned_child_scratch: Vec> = Vec::new(); process_owned_document( Arc::clone(doc_uri), doc_uri, @@ -879,8 +119,7 @@ fn build_prepared_index_for_documents<'a>( } Ok(state.index_data) } - -type KnownResources = AHashSet>; +pub(super) type KnownResources = AHashSet>; #[derive(Hash, Eq, PartialEq)] struct ReferenceKey { @@ -1074,7 +313,7 @@ struct ProcessingState<'a> { /// pre-stored value address; non-subresource paths (e.g. `#/components/schemas/Foo`) /// are still fully traversed. deferred_refs: Vec, - borrowed_reference_scratch: crate::specification::BorrowedReferenceSlots<'a>, + borrowed_reference_scratch: crate::spec::ReferenceSlots<'a>, borrowed_child_scratch: Vec<(&'a Value, Draft)>, index_data: PreparedIndex<'a>, } @@ -1090,13 +329,12 @@ impl ProcessingState<'_> { custom_metaschemas: Vec::new(), visited_schemas: AHashSet::new(), deferred_refs: Vec::new(), - borrowed_reference_scratch: crate::specification::BorrowedReferenceSlots::default(), + borrowed_reference_scratch: crate::spec::ReferenceSlots::default(), borrowed_child_scratch: Vec::new(), index_data: PreparedIndex::default(), } } } - fn process_input_resources_mixed<'a>( pairs: impl IntoIterator, PendingResource<'a>)>, documents: &mut DocumentStore<'a>, @@ -1185,7 +423,7 @@ fn process_queue<'r>( continue; } let mut document_local_seen = LocalSeen::new(); - let mut owned_reference_scratch = crate::specification::BorrowedReferenceSlots::default(); + let mut owned_reference_scratch = crate::spec::ReferenceSlots::default(); let mut owned_child_scratch = Vec::new(); process_owned_document( base, @@ -1257,11 +495,11 @@ fn explore_borrowed_subtree<'r>( if let Some(object) = object { crate::observe_registry!("registry.borrowed.object_len={}", object.len()); } - let probe = object.map(|schema| draft.probe_borrowed_object_map(schema)); - if let Some(probe) = probe.as_ref() { + let scan = object.map(|schema| draft.scan_object(schema)); + if let Some(scan) = scan.as_ref() { #[cfg(feature = "perf-observe-registry")] { - let id_scan = match (probe.id.is_some(), probe.has_anchor) { + let id_scan = match (scan.id.is_some(), scan.has_anchor) { (false, false) => "none", (true, false) => "id_only", (false, true) => "anchor_only", @@ -1269,7 +507,7 @@ fn explore_borrowed_subtree<'r>( }; crate::observe_registry!("registry.id_scan={id_scan}"); } - if let Some(id) = probe.id { + if let Some(id) = scan.id { let original_base_uri = Arc::clone(¤t_base_uri); current_base_uri = resolve_id(¤t_base_uri, id, resolution_cache)?; known_resources.insert((*current_base_uri).clone()); @@ -1283,7 +521,7 @@ fn explore_borrowed_subtree<'r>( subschema, ); } - } else if probe.has_anchor && !is_root_entry { + } else if scan.has_anchor && !is_root_entry { insert_borrowed_discovered_index_entries( &mut state.index_data, ¤t_base_uri, @@ -1294,8 +532,8 @@ fn explore_borrowed_subtree<'r>( } } - if let (Some(schema), Some(probe)) = (object, probe.as_ref()) { - if probe.has_ref_or_schema { + if let (Some(schema), Some(scan)) = (object, scan.as_ref()) { + if scan.has_ref_or_schema { let child_start = state.borrowed_child_scratch.len(); draft.scan_borrowed_object_into_scratch_map( schema, @@ -1413,7 +651,7 @@ fn explore_borrowed_subtree<'r>( } let subschema_ptr = std::ptr::from_ref::(subschema) as usize; if state.visited_schemas.insert(subschema_ptr) - && probe.as_ref().is_none_or(|probe| probe.has_ref_or_schema) + && scan.as_ref().is_none_or(|scan| scan.has_ref_or_schema) { collect_external_resources( ¤t_base_uri, @@ -1460,8 +698,8 @@ fn process_owned_document<'a, 'r>( state: &mut ProcessingState<'r>, known_resources: &mut KnownResources, resolution_cache: &mut UriCache, - owned_reference_scratch: &mut crate::specification::BorrowedReferenceSlots<'a>, - owned_child_scratch: &mut Vec>, + owned_reference_scratch: &mut crate::spec::ReferenceSlots<'a>, + owned_child_scratch: &mut Vec>, local_seen: &mut LocalSeen<'a>, ) -> Result<(), Error> { let document_root = document.contents(); @@ -1533,8 +771,8 @@ fn explore_owned_subtree<'a, 'r>( state: &mut ProcessingState<'r>, known_resources: &mut KnownResources, resolution_cache: &mut UriCache, - owned_reference_scratch: &mut crate::specification::BorrowedReferenceSlots<'a>, - owned_child_scratch: &mut Vec>, + owned_reference_scratch: &mut crate::spec::ReferenceSlots<'a>, + owned_child_scratch: &mut Vec>, local_seen: &mut LocalSeen<'a>, ) -> Result<(), Error> { let object = subschema.as_object(); @@ -1543,10 +781,10 @@ fn explore_owned_subtree<'a, 'r>( crate::observe_registry!("registry.owned.object_len={}", object.len()); } let child_start = owned_child_scratch.len(); - let gate = object.map(|schema| draft.owned_object_gate_map(schema)); - let (id, has_anchors) = gate + let info = object.map(|schema| draft.object_info(schema)); + let (id, has_anchors) = info .as_ref() - .map_or((None, false), |gate| (gate.id, gate.has_anchor)); + .map_or((None, false), |info| (info.id, info.has_anchor)); if let Some(id) = id { let original_base_uri = Arc::clone(¤t_base_uri); current_base_uri = resolve_id(¤t_base_uri, id, resolution_cache)?; @@ -1579,13 +817,13 @@ fn explore_owned_subtree<'a, 'r>( ); } - if let (Some(schema), Some(gate)) = (object, gate.as_ref()) { - if gate.ref_.is_some() || gate.schema.is_some() { + if let (Some(schema), Some(info)) = (object, info.as_ref()) { + if info.ref_.is_some() || info.schema.is_some() { #[cfg(feature = "perf-observe-registry")] { let kind = if schema.len() == 1 { "ref_only_leaf" - } else if gate.has_children { + } else if info.has_children { "ref_with_children" } else { "ref_no_children" @@ -1593,8 +831,8 @@ fn explore_owned_subtree<'a, 'r>( crate::observe_registry!("registry.owned.gate={kind}"); } if schema.len() == 1 { - owned_reference_scratch.ref_ = gate.ref_; - owned_reference_scratch.schema = gate.schema; + owned_reference_scratch.ref_ = info.ref_; + owned_reference_scratch.schema = info.schema; let subschema_ptr = std::ptr::from_ref::(subschema) as usize; if state.visited_schemas.insert(subschema_ptr) { @@ -1618,7 +856,7 @@ fn explore_owned_subtree<'a, 'r>( return Ok(()); } - if gate.has_children { + if info.has_children { let (_, _) = draft.scan_owned_object_into_scratch_map( schema, owned_reference_scratch, @@ -1673,8 +911,8 @@ fn explore_owned_subtree<'a, 'r>( return Ok(()); } - owned_reference_scratch.ref_ = gate.ref_; - owned_reference_scratch.schema = gate.schema; + owned_reference_scratch.ref_ = info.ref_; + owned_reference_scratch.schema = info.schema; let subschema_ptr = std::ptr::from_ref::(subschema) as usize; if state.visited_schemas.insert(subschema_ptr) { collect_external_resources_from_slots( @@ -1900,7 +1138,7 @@ fn run_sync_processing_loop<'a>( Ok(()) } -fn process_resources_mixed<'a>( +pub(super) fn process_resources_mixed<'a>( pairs: impl IntoIterator, PendingResource<'a>)>, retriever: &dyn Retrieve, documents: &mut DocumentStore<'a>, @@ -1928,7 +1166,7 @@ fn process_resources_mixed<'a>( } #[cfg(feature = "retrieve-async")] -async fn process_resources_async_mixed<'a>( +pub(super) async fn process_resources_async_mixed<'a>( pairs: impl IntoIterator, PendingResource<'a>)>, retriever: &dyn crate::AsyncRetrieve, documents: &mut DocumentStore<'a>, @@ -2084,21 +1322,21 @@ fn handle_retrieve_error( fn with_owned_child_path( path: &JsonPointerNode<'_, '_>, - child: &crate::specification::OwnedScratchChild<'_>, + child: &crate::spec::ChildNode<'_>, f: impl FnOnce(&JsonPointerNode<'_, '_>) -> R, ) -> R { - use crate::specification::OwnedPathSegment; + use crate::spec::PathSegment; let first = match child.first { - OwnedPathSegment::Key(key) => path.push(key), - OwnedPathSegment::Index(index) => path.push(index), + PathSegment::Key(key) => path.push(key), + PathSegment::Index(index) => path.push(index), }; match child.second { - Some(OwnedPathSegment::Key(key)) => { + Some(PathSegment::Key(key)) => { let second = first.push(key); f(&second) } - Some(OwnedPathSegment::Index(index)) => { + Some(PathSegment::Index(index)) => { let second = first.push(index); f(&second) } @@ -2109,7 +1347,7 @@ fn with_owned_child_path( fn collect_external_resources_from_slots<'doc>( base: &Arc>, root: &'doc Value, - references: &crate::specification::BorrowedReferenceSlots<'doc>, + references: &crate::spec::ReferenceSlots<'doc>, collected: &mut AHashSet<(String, Uri, ReferenceKind)>, seen: &mut ReferenceTracker, resolution_cache: &mut UriCache, @@ -2198,7 +1436,7 @@ fn collect_external_resources_from_slots<'doc>( Ok(()) } -fn validate_custom_metaschemas( +pub(super) fn validate_custom_metaschemas( custom_metaschemas: &[String], known_resources: &KnownResources, ) -> Result<(), Error> { @@ -2476,39 +1714,6 @@ fn resolve_id( } Ok(Arc::new(resolved)) } - -/// Look up a value by a JSON Pointer. -/// -/// **NOTE**: A slightly faster version of pointer resolution based on `Value::pointer` from `serde_json`. -pub fn pointer<'a>(document: &'a Value, pointer: &str) -> Option<&'a Value> { - crate::observe_registry!( - "registry.pointer_segments={}", - bytecount::count(pointer.as_bytes(), b'/') - ); - if pointer.is_empty() { - return Some(document); - } - if !pointer.starts_with('/') { - return None; - } - pointer.split('/').skip(1).map(unescape_segment).try_fold( - document, - |target, token| match target { - Value::Object(map) => map.get(&*token), - Value::Array(list) => parse_index(&token).and_then(|x| list.get(x)), - _ => None, - }, - ) -} - -// Taken from `serde_json`. -#[must_use] -pub fn parse_index(s: &str) -> Option { - if s.starts_with('+') || (s.starts_with('0') && s.len() != 1) { - return None; - } - s.parse().ok() -} #[cfg(test)] mod tests { use std::{error::Error as _, sync::Arc}; @@ -2518,41 +1723,13 @@ mod tests { use serde_json::{json, Value}; use test_case::test_case; - use crate::{uri::from_str, Anchor, Draft, JsonPointerNode, Registry, Resource, Retrieve}; + use crate::{uri::from_str, Anchor, Draft, Registry, Resource, Retrieve}; use super::{ - insert_root_index_entries, pointer, process_borrowed_document, process_owned_document, - IndexedResource, KnownResources, LocalSeen, ParsedPointer, ProcessingState, StoredDocument, - SPECIFICATIONS, + insert_root_index_entries, process_borrowed_document, process_owned_document, + IndexedResource, KnownResources, LocalSeen, ProcessingState, StoredDocument, }; - use crate::cache::UriCache; - - #[test] - fn test_empty_pointer() { - let document = json!({}); - assert_eq!(pointer(&document, ""), Some(&document)); - } - - #[test] - fn test_parsed_pointer_from_json_pointer_node_matches_pointer_lookup() { - let document = json!({ - "$defs": { - "foo/bar": [ - {"value": true} - ] - } - }); - let root = JsonPointerNode::new(); - let defs = root.push("$defs"); - let entry = defs.push("foo/bar"); - let node = entry.push(0); - - let parsed = ParsedPointer::from_pointer_node(&node); - assert_eq!( - parsed.lookup(&document), - pointer(&document, "/$defs/foo~1bar/0") - ); - } + use crate::{cache::UriCache, registry::SPECIFICATIONS}; #[test] fn test_invalid_uri_on_registry_creation() { @@ -2605,7 +1782,7 @@ mod tests { .expect("Invalid resources") .prepare() .expect("Invalid resources"); - assert!(registry.contains_resource_uri("urn:root")); + assert!(registry.contains_resource("urn:root")); } #[test] @@ -2745,8 +1922,8 @@ mod tests { let mut state = ProcessingState::new(); let mut known_resources = KnownResources::default(); let mut resolution_cache = UriCache::new(); - let mut owned_reference_scratch = crate::specification::BorrowedReferenceSlots::default(); - let mut owned_child_scratch: Vec> = Vec::new(); + let mut owned_reference_scratch = crate::spec::ReferenceSlots::default(); + let mut owned_child_scratch: Vec> = Vec::new(); let mut local_seen = LocalSeen::new(); known_resources.insert((*doc_key).clone()); @@ -2790,8 +1967,8 @@ mod tests { let mut state = ProcessingState::new(); let mut known_resources = KnownResources::default(); let mut resolution_cache = UriCache::new(); - let mut owned_reference_scratch = crate::specification::BorrowedReferenceSlots::default(); - let mut owned_child_scratch: Vec> = Vec::new(); + let mut owned_reference_scratch = crate::spec::ReferenceSlots::default(); + let mut owned_child_scratch: Vec> = Vec::new(); let mut local_seen = LocalSeen::new(); known_resources.insert((*doc_key).clone()); @@ -3057,7 +2234,7 @@ mod tests { .prepare() .expect("Registry should prepare"); - assert!(registry.contains_resource_uri("http://example.com/remote")); + assert!(registry.contains_resource("http://example.com/remote")); } struct TestCase { @@ -3277,7 +2454,7 @@ mod tests { .prepare() .expect("Registry should prepare"); - assert!(registry.contains_resource_uri("urn:test")); + assert!(registry.contains_resource("urn:test")); } #[test] @@ -3294,10 +2471,10 @@ mod tests { .prepare() .expect("Registry should prepare"); - assert!(original.contains_resource_uri("urn:one")); - assert!(!original.contains_resource_uri("urn:two")); - assert!(registry.contains_resource_uri("urn:one")); - assert!(registry.contains_resource_uri("urn:two")); + assert!(original.contains_resource("urn:one")); + assert!(!original.contains_resource("urn:two")); + assert!(registry.contains_resource("urn:one")); + assert!(registry.contains_resource("urn:two")); } #[test] @@ -3309,7 +2486,7 @@ mod tests { .prepare() .expect("Registry should prepare"); - assert!(registry.contains_resource_uri("urn:test")); + assert!(registry.contains_resource("urn:test")); } #[test] @@ -3459,10 +2636,10 @@ mod tests { .prepare() .expect("Registry should prepare"); - assert!(original.contains_resource_uri("urn:one")); - assert!(!original.contains_resource_uri("urn:two")); - assert!(registry.contains_resource_uri("urn:one")); - assert!(registry.contains_resource_uri("urn:two")); + assert!(original.contains_resource("urn:one")); + assert!(!original.contains_resource("urn:two")); + assert!(registry.contains_resource("urn:one")); + assert!(registry.contains_resource("urn:two")); } #[test] @@ -3470,7 +2647,7 @@ mod tests { let resource = Draft::Draft202012.create_resource(json!({"$schema": "$##"})); let _ = Registry::new() .add("http://#/", resource) - .and_then(super::RegistryBuilder::prepare); + .and_then(crate::registry::RegistryBuilder::prepare); } } diff --git a/crates/jsonschema-referencing/src/registry/index.rs b/crates/jsonschema-referencing/src/registry/index.rs new file mode 100644 index 00000000..4abacdfb --- /dev/null +++ b/crates/jsonschema-referencing/src/registry/index.rs @@ -0,0 +1,109 @@ +//! Lookup structures produced by the build pass. +//! +//! Each [`IndexedResource`] and [`IndexedAnchor`] has two variants: +//! - `Borrowed`: the schema value is referenced in place from the caller — no allocation. +//! - `Owned`: the schema value is stored behind an [`Arc`]; a [`ParsedPointer`] locates +//! the sub-schema within the document. +//! +//! This split avoids cloning schema values when the caller's data outlives the registry. + +use std::sync::Arc; + +use fluent_uri::Uri; + +use crate::{ + anchor::Anchor, draft::Draft, pointer::ParsedPointer, small_map::SmallMap, ResourceRef, +}; + +use super::build::StoredDocument; + +pub(super) type AnchorName = Box; + +/// Lookup tables mapping canonical URIs to resources and anchors. +#[derive(Debug, Clone, Default)] +pub(super) struct PreparedIndex<'a> { + pub(super) resources: SmallMap>, IndexedResource<'a>>, + pub(super) anchors: SmallMap>, SmallMap>>, +} + +/// A schema resource in the index: either borrowed from the caller or owned by the registry. +#[derive(Debug, Clone)] +pub(super) enum IndexedResource<'a> { + Borrowed(ResourceRef<'a>), + Owned { + document: Arc>, + pointer: ParsedPointer, + draft: Draft, + }, +} + +impl IndexedResource<'_> { + #[inline] + pub(super) fn resolve(&self) -> Option> { + match self { + IndexedResource::Borrowed(resource) => { + Some(ResourceRef::new(resource.contents(), resource.draft())) + } + IndexedResource::Owned { + document, + pointer, + draft, + } => { + let contents = pointer.lookup(document.contents())?; + Some(ResourceRef::new(contents, *draft)) + } + } + } +} + +/// An anchor in the index: either borrowed from the caller or owned by the registry. +#[derive(Debug, Clone)] +pub(super) enum IndexedAnchor<'a> { + Borrowed(Anchor<'a>), + Owned { + document: Arc>, + pointer: ParsedPointer, + draft: Draft, + kind: IndexedAnchorKind, + name: Box, + }, +} + +impl IndexedAnchor<'_> { + #[inline] + pub(super) fn resolve(&self) -> Option> { + match self { + IndexedAnchor::Borrowed(anchor) => Some(match anchor { + Anchor::Default { name, resource } => Anchor::Default { + name, + resource: ResourceRef::new(resource.contents(), resource.draft()), + }, + Anchor::Dynamic { name, resource } => Anchor::Dynamic { + name, + resource: ResourceRef::new(resource.contents(), resource.draft()), + }, + }), + IndexedAnchor::Owned { + document, + pointer, + draft, + kind, + name, + } => { + let contents = pointer.lookup(document.contents())?; + let resource = ResourceRef::new(contents, *draft); + Some(match kind { + IndexedAnchorKind::Default => Anchor::Default { name, resource }, + IndexedAnchorKind::Dynamic => Anchor::Dynamic { name, resource }, + }) + } + } + } +} + +/// Whether an anchor is a plain anchor (`$anchor`) or a dynamic anchor (`$dynamicAnchor`). +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub(super) enum IndexedAnchorKind { + Default, + Dynamic, +} diff --git a/crates/jsonschema-referencing/src/registry/input.rs b/crates/jsonschema-referencing/src/registry/input.rs new file mode 100644 index 00000000..904630b4 --- /dev/null +++ b/crates/jsonschema-referencing/src/registry/input.rs @@ -0,0 +1,133 @@ +//! Input normalisation for resources entering the registry. +//! +//! [`PendingResource`] is a 2x2 enum covering every combination of: +//! - **value type**: raw [`Value`] vs typed [`Resource`] / [`ResourceRef`] +//! - **ownership**: owned (caller transfers) vs borrowed (caller retains, lifetime `'a`) +//! +//! [`IntoRegistryResource`] is sealed so only the four approved input forms can enter +//! the pipeline — implementing it from outside the crate is intentionally prevented. + +use std::sync::Arc; + +use ahash::AHashMap; +use fluent_uri::Uri; +use serde_json::Value; + +use crate::{Resource, ResourceRef, Retrieve}; + +/// A resource waiting to enter the registry. +#[derive(Clone)] +pub(crate) enum PendingResource<'a> { + OwnedValue(Value), + BorrowedValue(&'a Value), + OwnedResource(Resource), + BorrowedResource(ResourceRef<'a>), +} + +pub(crate) mod private { + use ahash::AHashMap; + use fluent_uri::Uri; + + use super::PendingResource; + + pub(crate) trait Sealed<'a> { + fn insert_into( + self, + pending: &mut AHashMap, PendingResource<'a>>, + uri: Uri, + ); + } +} + +#[allow(private_bounds)] +pub trait IntoRegistryResource<'a>: private::Sealed<'a> {} + +impl<'a, T> IntoRegistryResource<'a> for T where T: private::Sealed<'a> {} + +impl<'a> private::Sealed<'a> for Resource { + fn insert_into( + self, + pending: &mut AHashMap, PendingResource<'a>>, + uri: Uri, + ) { + pending.insert(uri, PendingResource::OwnedResource(self)); + } +} + +impl<'a> private::Sealed<'a> for &'a Resource { + fn insert_into( + self, + pending: &mut AHashMap, PendingResource<'a>>, + uri: Uri, + ) { + pending.insert( + uri, + PendingResource::BorrowedResource(ResourceRef::new(self.contents(), self.draft())), + ); + } +} + +impl<'a> private::Sealed<'a> for &'a Value { + fn insert_into( + self, + pending: &mut AHashMap, PendingResource<'a>>, + uri: Uri, + ) { + pending.insert(uri, PendingResource::BorrowedValue(self)); + } +} + +impl<'a> private::Sealed<'a> for ResourceRef<'a> { + fn insert_into( + self, + pending: &mut AHashMap, PendingResource<'a>>, + uri: Uri, + ) { + pending.insert(uri, PendingResource::BorrowedResource(self)); + } +} + +impl<'a> private::Sealed<'a> for Value { + fn insert_into( + self, + pending: &mut AHashMap, PendingResource<'a>>, + uri: Uri, + ) { + pending.insert(uri, PendingResource::OwnedValue(self)); + } +} + +pub trait IntoRetriever { + fn into_retriever(self) -> Arc; +} + +impl IntoRetriever for T { + fn into_retriever(self) -> Arc { + Arc::new(self) + } +} + +impl IntoRetriever for Arc { + fn into_retriever(self) -> Arc { + self + } +} + +#[cfg(feature = "retrieve-async")] +pub trait IntoAsyncRetriever { + fn into_retriever(self) -> Arc; +} + +#[cfg(feature = "retrieve-async")] +impl IntoAsyncRetriever for T { + fn into_retriever(self) -> Arc { + Arc::new(self) + } +} + +#[cfg(feature = "retrieve-async")] +impl IntoAsyncRetriever for Arc { + fn into_retriever(self) -> Arc { + self + } +} diff --git a/crates/jsonschema-referencing/src/registry/mod.rs b/crates/jsonschema-referencing/src/registry/mod.rs new file mode 100644 index 00000000..be0f7468 --- /dev/null +++ b/crates/jsonschema-referencing/src/registry/mod.rs @@ -0,0 +1,543 @@ +use std::{ + fmt, + sync::{Arc, LazyLock}, +}; + +use ahash::AHashMap; +use fluent_uri::Uri; +use serde_json::Value; + +use crate::{ + cache::{SharedUriCache, UriCache}, + uri, + vocabularies::{self, VocabularySet}, + Anchor, DefaultRetriever, Draft, Error, Resolver, ResourceRef, Retrieve, +}; + +mod build; +#[cfg(feature = "retrieve-async")] +use build::process_resources_async_mixed; +use build::{ + build_prepared_index_for_documents, process_resources_mixed, validate_custom_metaschemas, + DocumentStore, KnownResources, StoredDocument, +}; + +mod index; +use index::{IndexedAnchor, IndexedResource, PreparedIndex}; + +mod input; +#[cfg(feature = "retrieve-async")] +pub(crate) use input::IntoAsyncRetriever; +pub use input::IntoRegistryResource; +pub(crate) use input::{IntoRetriever, PendingResource}; + +/// Pre-loaded registry containing all JSON Schema meta-schemas and their vocabularies +pub static SPECIFICATIONS: LazyLock> = + LazyLock::new(|| Registry::build_from_meta_schemas(crate::meta::META_SCHEMAS_ALL.as_slice())); + +#[derive(Clone)] +pub struct RegistryBuilder<'a> { + baseline: Option<&'a Registry<'a>>, + pending: AHashMap, PendingResource<'a>>, + retriever: Arc, + #[cfg(feature = "retrieve-async")] + async_retriever: Option>, + draft: Option, +} + +impl fmt::Debug for RegistryBuilder<'_> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_struct("RegistryBuilder") + .field("has_baseline", &self.baseline.is_some()) + .field("pending_len", &self.pending.len()) + .field("draft", &self.draft) + .finish() + } +} + +/// A registry of JSON Schema resources, each identified by their canonical URIs. +/// +/// `Registry` is a prepared registry: add resources with [`Registry::new`] and +/// [`RegistryBuilder::add`], then call [`RegistryBuilder::prepare`] to build the +/// reusable registry. To resolve `$ref` references directly, create a [`Resolver`] +/// from the prepared registry: +/// +/// ```rust +/// use referencing::Registry; +/// +/// # fn main() -> Result<(), Box> { +/// let schema = serde_json::json!({ +/// "$schema": "https://json-schema.org/draft/2020-12/schema", +/// "$id": "https://example.com/root", +/// "$defs": { "item": { "type": "string" } }, +/// "items": { "$ref": "#/$defs/item" } +/// }); +/// +/// let registry = Registry::new() +/// .add("https://example.com/root", schema)? +/// .prepare()?; +/// +/// let resolver = registry.resolver(referencing::uri::from_str("https://example.com/root")?); +/// # Ok(()) +/// # } +/// ``` +#[derive(Debug, Clone)] +pub struct Registry<'a> { + baseline: Option<&'a Registry<'a>>, + resolution_cache: SharedUriCache, + known_resources: KnownResources, + index_data: PreparedIndex<'a>, +} + +impl<'a> RegistryBuilder<'a> { + fn new() -> Self { + Self { + baseline: None, + pending: AHashMap::new(), + retriever: Arc::new(DefaultRetriever), + #[cfg(feature = "retrieve-async")] + async_retriever: None, + draft: None, + } + } + + fn from_registry(registry: &'a Registry<'a>) -> Self { + Self { + baseline: Some(registry), + pending: AHashMap::new(), + retriever: Arc::new(DefaultRetriever), + #[cfg(feature = "retrieve-async")] + async_retriever: None, + draft: None, + } + } + + #[must_use] + pub fn draft(mut self, draft: Draft) -> Self { + self.draft = Some(draft); + self + } + + #[must_use] + pub fn retriever(mut self, retriever: impl IntoRetriever) -> Self { + self.retriever = retriever.into_retriever(); + self + } + + #[cfg(feature = "retrieve-async")] + #[must_use] + pub fn async_retriever(mut self, retriever: impl IntoAsyncRetriever) -> Self { + self.async_retriever = Some(retriever.into_retriever()); + self + } + + /// Add a resource to the registry builder. + /// + /// # Errors + /// + /// Returns an error if the URI is invalid. + pub fn add<'b>( + self, + uri: impl AsRef, + resource: impl IntoRegistryResource<'b>, + ) -> Result, Error> + where + 'a: 'b, + { + let parsed = uri::from_str(uri.as_ref().trim_end_matches('#'))?; + let mut pending: AHashMap, PendingResource<'b>> = + self.pending.into_iter().collect(); + input::private::Sealed::insert_into(resource, &mut pending, parsed); + Ok(RegistryBuilder { + baseline: self.baseline, + pending, + retriever: self.retriever, + #[cfg(feature = "retrieve-async")] + async_retriever: self.async_retriever, + draft: self.draft, + }) + } + + /// Add multiple resources to the registry builder. + /// + /// # Errors + /// + /// Returns an error if any URI is invalid. + pub fn extend<'b, I, U, T>(self, pairs: I) -> Result, Error> + where + 'a: 'b, + I: IntoIterator, + U: AsRef, + T: IntoRegistryResource<'b>, + { + let mut builder = RegistryBuilder { + baseline: self.baseline, + pending: self.pending.into_iter().collect(), + retriever: self.retriever, + #[cfg(feature = "retrieve-async")] + async_retriever: self.async_retriever, + draft: self.draft, + }; + for (uri, resource) in pairs { + builder = builder.add(uri, resource)?; + } + Ok(builder) + } + + /// Prepare the registry for reuse. + /// + /// # Errors + /// + /// Returns an error if URI processing, retrieval, or custom meta-schema validation fails. + pub fn prepare(self) -> Result, Error> { + if let Some(baseline) = self.baseline { + baseline.try_with_pending_resources_and_retriever( + self.pending, + &*self.retriever, + self.draft, + ) + } else { + Registry::try_from_pending_resources_impl(self.pending, &*self.retriever, self.draft) + } + } + + #[cfg(feature = "retrieve-async")] + /// Prepare the registry for reuse with async retrieval. + /// + /// # Errors + /// + /// Returns an error if URI processing, retrieval, or custom meta-schema validation fails. + pub async fn async_prepare(self) -> Result, Error> { + let retriever = self + .async_retriever + .unwrap_or_else(|| Arc::new(DefaultRetriever)); + if let Some(baseline) = self.baseline { + baseline + .try_with_pending_resources_and_retriever_async( + self.pending, + &*retriever, + self.draft, + ) + .await + } else { + Registry::try_from_pending_resources_async_impl(self.pending, &*retriever, self.draft) + .await + } + } +} + +impl<'a> Registry<'a> { + /// Add a resource to a prepared registry, returning a builder that must be prepared again. + /// + /// # Errors + /// + /// Returns an error if the URI is invalid. + pub fn add<'b>( + &'b self, + uri: impl AsRef, + resource: impl IntoRegistryResource<'b>, + ) -> Result, Error> + where + 'a: 'b, + { + RegistryBuilder::from_registry(self).add(uri, resource) + } + + /// Add multiple resources to a prepared registry, returning a builder that + /// must be prepared again. + /// + /// # Errors + /// + /// Returns an error if any URI is invalid. + pub fn extend<'b, I, U, T>(&'b self, pairs: I) -> Result, Error> + where + 'a: 'b, + I: IntoIterator, + U: AsRef, + T: IntoRegistryResource<'b>, + { + RegistryBuilder::from_registry(self).extend(pairs) + } +} + +impl Registry<'static> { + #[allow(clippy::new_ret_no_self)] + #[must_use] + pub fn new<'a>() -> RegistryBuilder<'a> { + RegistryBuilder::new() + } + + fn try_from_pending_resources_impl<'a>( + pairs: impl IntoIterator, PendingResource<'a>)>, + retriever: &dyn Retrieve, + draft: Option, + ) -> Result, Error> { + let mut documents = DocumentStore::new(); + let mut known_resources = KnownResources::new(); + let mut resolution_cache = UriCache::new(); + + let (custom_metaschemas, index_data) = process_resources_mixed( + pairs, + retriever, + &mut documents, + &mut known_resources, + &mut resolution_cache, + draft, + )?; + + validate_custom_metaschemas(&custom_metaschemas, &known_resources)?; + + Ok(Registry { + baseline: None, + resolution_cache: resolution_cache.into_shared(), + known_resources, + index_data, + }) + } + + #[cfg(feature = "retrieve-async")] + async fn try_from_pending_resources_async_impl<'a>( + pairs: impl IntoIterator, PendingResource<'a>)>, + retriever: &dyn crate::AsyncRetrieve, + draft: Option, + ) -> Result, Error> { + let mut documents = DocumentStore::new(); + let mut known_resources = KnownResources::new(); + let mut resolution_cache = UriCache::new(); + + let (custom_metaschemas, index_data) = process_resources_async_mixed( + pairs, + retriever, + &mut documents, + &mut known_resources, + &mut resolution_cache, + draft, + ) + .await?; + + validate_custom_metaschemas(&custom_metaschemas, &known_resources)?; + + Ok(Registry { + baseline: None, + resolution_cache: resolution_cache.into_shared(), + known_resources, + index_data, + }) + } + + /// Build a registry with all the given meta-schemas from specs. + pub(crate) fn build_from_meta_schemas(schemas: &[(&'static str, &'static Value)]) -> Self { + let mut documents = DocumentStore::with_capacity(schemas.len()); + let mut known_resources = KnownResources::with_capacity(schemas.len()); + + for (uri, schema) in schemas { + let parsed = + uri::from_str(uri.trim_end_matches('#')).expect("meta-schema URI must be valid"); + let key = Arc::new(parsed); + let draft = Draft::default().detect(schema); + known_resources.insert((*key).clone()); + documents.insert(key, Arc::new(StoredDocument::borrowed(schema, draft))); + } + + let mut resolution_cache = UriCache::with_capacity(35); + let index_data = build_prepared_index_for_documents(&documents, &mut resolution_cache) + .expect("meta-schema index data must build"); + + Self { + baseline: None, + resolution_cache: resolution_cache.into_shared(), + known_resources, + index_data, + } + } +} + +impl<'a> Registry<'a> { + fn try_with_pending_resources_and_retriever( + &'a self, + pairs: impl IntoIterator, PendingResource<'a>)>, + retriever: &dyn Retrieve, + draft: Option, + ) -> Result, Error> { + let mut documents = DocumentStore::new(); + let mut resolution_cache = UriCache::new(); + let mut known_resources = self.known_resources.clone(); + + let (custom_metaschemas, index_data) = process_resources_mixed( + pairs, + retriever, + &mut documents, + &mut known_resources, + &mut resolution_cache, + draft, + )?; + validate_custom_metaschemas(&custom_metaschemas, &known_resources)?; + + Ok(Registry { + baseline: Some(self), + resolution_cache: resolution_cache.into_shared(), + known_resources, + index_data, + }) + } + + #[cfg(feature = "retrieve-async")] + async fn try_with_pending_resources_and_retriever_async( + &'a self, + pairs: impl IntoIterator, PendingResource<'a>)>, + retriever: &dyn crate::AsyncRetrieve, + draft: Option, + ) -> Result, Error> { + let mut documents = DocumentStore::new(); + let mut resolution_cache = UriCache::new(); + let mut known_resources = self.known_resources.clone(); + + let (custom_metaschemas, index_data) = process_resources_async_mixed( + pairs, + retriever, + &mut documents, + &mut known_resources, + &mut resolution_cache, + draft, + ) + .await?; + validate_custom_metaschemas(&custom_metaschemas, &known_resources)?; + + Ok(Registry { + baseline: Some(self), + resolution_cache: resolution_cache.into_shared(), + known_resources, + index_data, + }) + } + + /// Returns `true` if the registry contains a resource at the given URI. + /// + /// Returns `false` if the URI is malformed. + #[must_use] + pub fn contains_resource(&self, uri: &str) -> bool { + let Ok(uri) = uri::from_str(uri) else { + return false; + }; + self.resource_by_uri(&uri).is_some() + } + + /// Returns `true` if the registry contains an anchor named `name` at the given URI. + /// + /// Returns `false` if the URI is malformed. + #[must_use] + pub fn contains_anchor(&self, uri: &str, name: &str) -> bool { + let Ok(uri) = uri::from_str(uri) else { + return false; + }; + self.contains_anchor_uri(&uri, name) + } + + /// Creates a [`Resolver`] rooted at `base_uri`. + /// + /// The returned resolver borrows from this registry and cannot outlive it. + #[must_use] + pub fn resolver(&self, base_uri: Uri) -> Resolver<'_> { + Resolver::new(self, Arc::new(base_uri)) + } + + /// Returns the vocabulary set active for a schema with the given `contents`. + /// + /// Detects the draft from the `$schema` field. If no draft is detected or + /// the draft has no registered vocabularies, returns the default vocabulary + /// set — never errors. + #[must_use] + pub fn find_vocabularies(&self, draft: Draft, contents: &Value) -> VocabularySet { + match draft.detect(contents) { + Draft::Unknown => { + if let Some(specification) = contents + .as_object() + .and_then(|obj| obj.get("$schema")) + .and_then(|s| s.as_str()) + { + if let Ok(mut uri) = uri::from_str(specification) { + uri.set_fragment(None); + if let Some(resource) = self.resource_by_uri(&uri) { + if let Ok(Some(vocabularies)) = vocabularies::find(resource.contents()) + { + return vocabularies; + } + } + } + } + Draft::Unknown.default_vocabularies() + } + draft => draft.default_vocabularies(), + } + } + + /// Resolves `uri` against `base` and returns the resulting absolute URI. + /// + /// Results are cached. Returns an error if `base` has no scheme or if + /// resolution fails. + /// + /// # Errors + /// + /// Returns an error if base has no schema or there is a fragment. + pub fn resolve_uri(&self, base: &Uri<&str>, uri: &str) -> Result>, Error> { + self.resolution_cache.resolve_against(base, uri) + } + + #[inline] + pub(crate) fn resource_by_uri(&self, uri: &Uri) -> Option> { + self.index_data + .resources + .get(uri) + .and_then(IndexedResource::resolve) + .or_else(|| { + self.baseline + .and_then(|baseline| baseline.resource_by_uri(uri)) + }) + } + + pub(crate) fn contains_anchor_uri(&self, uri: &Uri, name: &str) -> bool { + self.index_data + .anchors + .get(uri) + .is_some_and(|entries| entries.contains_key(name)) + || self + .baseline + .is_some_and(|baseline| baseline.contains_anchor_uri(uri, name)) + } + + pub(crate) fn anchor(&self, uri: &Uri, name: &str) -> Result, Error> { + if let Some(anchor) = self.anchor_exact(uri, name) { + return Ok(anchor); + } + + if let Some(resource) = self.resource_by_uri(uri) { + if let Some(id) = resource.id() { + let canonical = uri::from_str(id)?; + if let Some(anchor) = self.anchor_exact(&canonical, name) { + return Ok(anchor); + } + } + } + + if name.contains('/') { + Err(Error::invalid_anchor(name.to_string())) + } else { + Err(Error::no_such_anchor(name.to_string())) + } + } + + fn local_anchor_by_uri(&self, uri: &Uri, name: &str) -> Option> { + self.index_data + .anchors + .get(uri) + .and_then(|entries| entries.get(name)) + .and_then(IndexedAnchor::resolve) + } + + fn anchor_exact(&self, uri: &Uri, name: &str) -> Option> { + self.local_anchor_by_uri(uri, name).or_else(|| { + self.baseline + .and_then(|baseline| baseline.anchor_exact(uri, name)) + }) + } +} diff --git a/crates/jsonschema-referencing/src/resolver.rs b/crates/jsonschema-referencing/src/resolver.rs index 8937c440..f567ccde 100644 --- a/crates/jsonschema-referencing/src/resolver.rs +++ b/crates/jsonschema-referencing/src/resolver.rs @@ -54,6 +54,7 @@ impl<'r> Resolver<'r> { scopes: List::new(), } } + /// Returns the current base URI of this resolver. #[must_use] #[inline] pub fn base_uri(&self) -> Arc> { @@ -73,9 +74,7 @@ impl<'r> Resolver<'r> { } else { (reference, "") }; - let uri = self - .registry - .resolve_against(&self.base_uri.borrow(), uri)?; + let uri = self.registry.resolve_uri(&self.base_uri.borrow(), uri)?; (uri, fragment) }; @@ -166,7 +165,7 @@ impl<'r> Resolver<'r> { #[inline] pub fn in_subresource(&self, subresource: ResourceRef<'_>) -> Result { if let Some(id) = subresource.id() { - let base_uri = self.registry.resolve_against(&self.base_uri.borrow(), id)?; + let base_uri = self.registry.resolve_uri(&self.base_uri.borrow(), id)?; Ok(Resolver { registry: self.registry, base_uri, @@ -205,8 +204,8 @@ impl<'r> Resolver<'r> { /// /// If the reference is invalid. #[inline] - pub fn resolve_against(&self, base: &Uri<&str>, uri: &str) -> Result>, Error> { - self.registry.resolve_against(base, uri) + pub fn resolve_uri(&self, base: &Uri<&str>, uri: &str) -> Result>, Error> { + self.registry.resolve_uri(base, uri) } #[must_use] diff --git a/crates/jsonschema-referencing/src/specification/draft201909.rs b/crates/jsonschema-referencing/src/spec/draft201909.rs similarity index 93% rename from crates/jsonschema-referencing/src/specification/draft201909.rs rename to crates/jsonschema-referencing/src/spec/draft201909.rs index d5fce72c..caad3f53 100644 --- a/crates/jsonschema-referencing/src/specification/draft201909.rs +++ b/crates/jsonschema-referencing/src/spec/draft201909.rs @@ -1,13 +1,14 @@ use serde_json::{Map, Value}; use crate::{ - specification::{BorrowedReferenceSlots, Draft, OwnedObjectGate, OwnedScratchChild}, + draft::Draft, + spec::{ChildNode, ObjectInfo, ReferenceSlots}, Error, JsonPointerNode, Resolver, ResourceRef, Segments, }; -use super::subresources::{self, SubresourceIteratorInner}; +use super::draft202012::{self, SubresourceIteratorInner}; -pub(crate) fn owned_object_gate_map(schema: &Map) -> OwnedObjectGate<'_> { +pub(crate) fn object_info(schema: &Map) -> ObjectInfo<'_> { let mut id = None; let mut has_anchor = false; let mut ref_ = None; @@ -52,7 +53,7 @@ pub(crate) fn owned_object_gate_map(schema: &Map) -> OwnedObjectG } } - OwnedObjectGate { + ObjectInfo { id, has_anchor, ref_, @@ -64,7 +65,7 @@ pub(crate) fn owned_object_gate_map(schema: &Map) -> OwnedObjectG pub(crate) fn scan_borrowed_object_into_scratch_map<'a>( schema: &'a Map, draft: Draft, - references: &mut BorrowedReferenceSlots<'a>, + references: &mut ReferenceSlots<'a>, children: &mut Vec<(&'a Value, Draft)>, ) { for (key, value) in schema { @@ -132,8 +133,8 @@ pub(crate) fn scan_borrowed_object_into_scratch_map<'a>( pub(crate) fn scan_owned_object_into_scratch_map<'a>( schema: &'a Map, draft: Draft, - references: &mut BorrowedReferenceSlots<'a>, - children: &mut Vec>, + references: &mut ReferenceSlots<'a>, + children: &mut Vec>, ) -> (Option<&'a str>, bool) { let mut id = None; let mut has_anchor = false; @@ -163,16 +164,12 @@ pub(crate) fn scan_owned_object_into_scratch_map<'a>( | "then" | "unevaluatedItems" | "unevaluatedProperties" => { - children.push(OwnedScratchChild::key( - key.as_str(), - value, - draft.detect(value), - )); + children.push(ChildNode::key(key.as_str(), value, draft.detect(value))); } "allOf" | "anyOf" | "oneOf" => { if let Some(arr) = value.as_array() { for (index, item) in arr.iter().enumerate() { - children.push(OwnedScratchChild::key_index( + children.push(ChildNode::key_index( key.as_str(), index, item, @@ -184,7 +181,7 @@ pub(crate) fn scan_owned_object_into_scratch_map<'a>( "$defs" | "definitions" | "dependentSchemas" | "patternProperties" | "properties" => { if let Some(obj) = value.as_object() { for (child_key, child_value) in obj { - children.push(OwnedScratchChild::key_key( + children.push(ChildNode::key_key( key.as_str(), child_key.as_str(), child_value, @@ -196,7 +193,7 @@ pub(crate) fn scan_owned_object_into_scratch_map<'a>( "items" => match value { Value::Array(arr) => { for (index, item) in arr.iter().enumerate() { - children.push(OwnedScratchChild::key_index( + children.push(ChildNode::key_index( "items", index, item, @@ -204,7 +201,7 @@ pub(crate) fn scan_owned_object_into_scratch_map<'a>( )); } } - _ => children.push(OwnedScratchChild::key("items", value, draft.detect(value))), + _ => children.push(ChildNode::key("items", value, draft.detect(value))), }, "dependencies" => { if let Some(obj) = value.as_object() { @@ -212,7 +209,7 @@ pub(crate) fn scan_owned_object_into_scratch_map<'a>( if !child_value.is_object() { continue; } - children.push(OwnedScratchChild::key_key( + children.push(ChildNode::key_key( key.as_str(), child_key.as_str(), child_value, @@ -431,7 +428,7 @@ pub(crate) fn maybe_in_subresource<'r>( "properties", ]; - subresources::maybe_in_subresource_with_items_and_dependencies( + draft202012::maybe_in_subresource_with_items_and_dependencies( segments, resolver, subresource, diff --git a/crates/jsonschema-referencing/src/specification/subresources.rs b/crates/jsonschema-referencing/src/spec/draft202012.rs similarity index 97% rename from crates/jsonschema-referencing/src/specification/subresources.rs rename to crates/jsonschema-referencing/src/spec/draft202012.rs index fc8d3d20..d5460109 100644 --- a/crates/jsonschema-referencing/src/specification/subresources.rs +++ b/crates/jsonschema-referencing/src/spec/draft202012.rs @@ -4,15 +4,16 @@ use std::iter::FlatMap; use serde_json::{Map, Value}; use crate::{ + draft::Draft, segments::Segment, - specification::{BorrowedReferenceSlots, Draft, OwnedObjectGate, OwnedScratchChild}, + spec::{ChildNode, ObjectInfo, ReferenceSlots}, Error, JsonPointerNode, Resolver, ResourceRef, Segments, }; pub(crate) fn scan_borrowed_object_into_scratch_map<'a>( schema: &'a Map, draft: Draft, - references: &mut BorrowedReferenceSlots<'a>, + references: &mut ReferenceSlots<'a>, children: &mut Vec<(&'a Value, Draft)>, ) { for (key, value) in schema { @@ -59,7 +60,7 @@ pub(crate) fn scan_borrowed_object_into_scratch_map<'a>( } } -pub(crate) fn owned_object_gate_map(schema: &Map) -> OwnedObjectGate<'_> { +pub(crate) fn object_info(schema: &Map) -> ObjectInfo<'_> { let mut id = None; let mut has_anchor = false; let mut ref_ = None; @@ -93,7 +94,7 @@ pub(crate) fn owned_object_gate_map(schema: &Map) -> OwnedObjectG } } - OwnedObjectGate { + ObjectInfo { id, has_anchor, ref_, @@ -105,8 +106,8 @@ pub(crate) fn owned_object_gate_map(schema: &Map) -> OwnedObjectG pub(crate) fn scan_owned_object_into_scratch_map<'a>( schema: &'a Map, draft: Draft, - references: &mut BorrowedReferenceSlots<'a>, - children: &mut Vec>, + references: &mut ReferenceSlots<'a>, + children: &mut Vec>, ) -> (Option<&'a str>, bool) { let mut id = None; let mut has_anchor = false; @@ -136,16 +137,12 @@ pub(crate) fn scan_owned_object_into_scratch_map<'a>( | "then" | "unevaluatedItems" | "unevaluatedProperties" => { - children.push(OwnedScratchChild::key( - key.as_str(), - value, - draft.detect(value), - )); + children.push(ChildNode::key(key.as_str(), value, draft.detect(value))); } "allOf" | "anyOf" | "oneOf" | "prefixItems" => { if let Some(arr) = value.as_array() { for (index, item) in arr.iter().enumerate() { - children.push(OwnedScratchChild::key_index( + children.push(ChildNode::key_index( key.as_str(), index, item, @@ -157,7 +154,7 @@ pub(crate) fn scan_owned_object_into_scratch_map<'a>( "$defs" | "definitions" | "dependentSchemas" | "patternProperties" | "properties" => { if let Some(obj) = value.as_object() { for (child_key, child_value) in obj { - children.push(OwnedScratchChild::key_key( + children.push(ChildNode::key_key( key.as_str(), child_key.as_str(), child_value, diff --git a/crates/jsonschema-referencing/src/specification/draft4.rs b/crates/jsonschema-referencing/src/spec/draft4.rs similarity index 92% rename from crates/jsonschema-referencing/src/specification/draft4.rs rename to crates/jsonschema-referencing/src/spec/draft4.rs index 02335220..7fd3227b 100644 --- a/crates/jsonschema-referencing/src/specification/draft4.rs +++ b/crates/jsonschema-referencing/src/spec/draft4.rs @@ -1,13 +1,14 @@ use serde_json::{Map, Value}; use crate::{ - specification::{BorrowedReferenceSlots, Draft, OwnedObjectGate, OwnedScratchChild}, + draft::Draft, + spec::{ChildNode, ObjectInfo, ReferenceSlots}, Error, JsonPointerNode, Resolver, ResourceRef, Segments, }; -use super::subresources::{self, SubresourceIteratorInner}; +use super::draft202012::{self, SubresourceIteratorInner}; -pub(crate) fn owned_object_gate_map(schema: &Map) -> OwnedObjectGate<'_> { +pub(crate) fn object_info(schema: &Map) -> ObjectInfo<'_> { let mut raw_id = None; let mut ref_ = None; let mut schema_ref = None; @@ -55,7 +56,7 @@ pub(crate) fn owned_object_gate_map(schema: &Map) -> OwnedObjectG _ => None, }; - OwnedObjectGate { + ObjectInfo { id, has_anchor, ref_, @@ -67,7 +68,7 @@ pub(crate) fn owned_object_gate_map(schema: &Map) -> OwnedObjectG pub(crate) fn scan_borrowed_object_into_scratch_map<'a>( schema: &'a Map, draft: Draft, - references: &mut BorrowedReferenceSlots<'a>, + references: &mut ReferenceSlots<'a>, children: &mut Vec<(&'a Value, Draft)>, ) { for (key, value) in schema { @@ -144,8 +145,8 @@ pub(crate) fn scan_borrowed_object_into_scratch_map<'a>( pub(crate) fn scan_owned_object_into_scratch_map<'a>( schema: &'a Map, draft: Draft, - references: &mut BorrowedReferenceSlots<'a>, - children: &mut Vec>, + references: &mut ReferenceSlots<'a>, + children: &mut Vec>, ) -> (Option<&'a str>, bool) { let mut raw_id = None; let mut has_ref = false; @@ -165,11 +166,7 @@ pub(crate) fn scan_owned_object_into_scratch_map<'a>( } } "additionalItems" | "additionalProperties" if value.is_object() => { - children.push(OwnedScratchChild::key( - key.as_str(), - value, - draft.detect(value), - )); + children.push(ChildNode::key(key.as_str(), value, draft.detect(value))); } "contains" | "contentSchema" @@ -180,16 +177,12 @@ pub(crate) fn scan_owned_object_into_scratch_map<'a>( | "then" | "unevaluatedItems" | "unevaluatedProperties" => { - children.push(OwnedScratchChild::key( - key.as_str(), - value, - draft.detect(value), - )); + children.push(ChildNode::key(key.as_str(), value, draft.detect(value))); } "allOf" | "anyOf" | "oneOf" | "prefixItems" => { if let Some(arr) = value.as_array() { for (index, item) in arr.iter().enumerate() { - children.push(OwnedScratchChild::key_index( + children.push(ChildNode::key_index( key.as_str(), index, item, @@ -201,7 +194,7 @@ pub(crate) fn scan_owned_object_into_scratch_map<'a>( "$defs" | "definitions" | "dependentSchemas" | "patternProperties" | "properties" => { if let Some(obj) = value.as_object() { for (child_key, child_value) in obj { - children.push(OwnedScratchChild::key_key( + children.push(ChildNode::key_key( key.as_str(), child_key.as_str(), child_value, @@ -213,7 +206,7 @@ pub(crate) fn scan_owned_object_into_scratch_map<'a>( "items" => match value { Value::Array(arr) => { for (index, item) in arr.iter().enumerate() { - children.push(OwnedScratchChild::key_index( + children.push(ChildNode::key_index( "items", index, item, @@ -221,7 +214,7 @@ pub(crate) fn scan_owned_object_into_scratch_map<'a>( )); } } - _ => children.push(OwnedScratchChild::key("items", value, draft.detect(value))), + _ => children.push(ChildNode::key("items", value, draft.detect(value))), }, "dependencies" => { if let Some(obj) = value.as_object() { @@ -229,7 +222,7 @@ pub(crate) fn scan_owned_object_into_scratch_map<'a>( if !child_value.is_object() { continue; } - children.push(OwnedScratchChild::key_key( + children.push(ChildNode::key_key( key.as_str(), child_key.as_str(), child_value, @@ -461,7 +454,7 @@ pub(crate) fn maybe_in_subresource<'r>( "patternProperties", "properties", ]; - subresources::maybe_in_subresource_with_items_and_dependencies( + draft202012::maybe_in_subresource_with_items_and_dependencies( segments, resolver, subresource, @@ -472,11 +465,11 @@ pub(crate) fn maybe_in_subresource<'r>( #[cfg(test)] mod tests { - use crate::{specification::BorrowedReferenceSlots, Draft}; + use crate::{spec::ReferenceSlots, Draft}; use serde_json::json; #[test] - fn test_probe_borrowed_object_collects_control_keys() { + fn test_scan_object_collects_control_keys() { let schema = json!({ "id": "http://example.com/node", "$schema": "http://example.com/meta", @@ -485,7 +478,7 @@ mod tests { }, "items": {"type": "integer"} }); - let analysis = Draft::Draft4.probe_borrowed_object_map( + let analysis = Draft::Draft4.scan_object( schema .as_object() .expect("schema object should be analyzed"), @@ -506,7 +499,7 @@ mod tests { }, "items": {"type": "integer"} }); - let mut references = BorrowedReferenceSlots::default(); + let mut references = ReferenceSlots::default(); let mut children = Vec::new(); Draft::Draft4.scan_borrowed_object_into_scratch_map( diff --git a/crates/jsonschema-referencing/src/specification/draft6.rs b/crates/jsonschema-referencing/src/spec/draft6.rs similarity index 92% rename from crates/jsonschema-referencing/src/specification/draft6.rs rename to crates/jsonschema-referencing/src/spec/draft6.rs index 4f79ca12..eaf0af52 100644 --- a/crates/jsonschema-referencing/src/specification/draft6.rs +++ b/crates/jsonschema-referencing/src/spec/draft6.rs @@ -1,13 +1,14 @@ use serde_json::{Map, Value}; use crate::{ - specification::{BorrowedReferenceSlots, Draft, OwnedObjectGate, OwnedScratchChild}, + draft::Draft, + spec::{ChildNode, ObjectInfo, ReferenceSlots}, Error, JsonPointerNode, Resolver, ResourceRef, Segments, }; -use super::subresources::{self, SubresourceIteratorInner}; +use super::draft202012::{self, SubresourceIteratorInner}; -pub(crate) fn owned_object_gate_map(schema: &Map) -> OwnedObjectGate<'_> { +pub(crate) fn object_info(schema: &Map) -> ObjectInfo<'_> { let mut raw_id = None; let mut ref_ = None; let mut schema_ref = None; @@ -48,7 +49,7 @@ pub(crate) fn owned_object_gate_map(schema: &Map) -> OwnedObjectG _ => None, }; - OwnedObjectGate { + ObjectInfo { id, has_anchor, ref_, @@ -60,7 +61,7 @@ pub(crate) fn owned_object_gate_map(schema: &Map) -> OwnedObjectG pub(crate) fn scan_borrowed_object_into_scratch_map<'a>( schema: &'a Map, draft: Draft, - references: &mut BorrowedReferenceSlots<'a>, + references: &mut ReferenceSlots<'a>, children: &mut Vec<(&'a Value, Draft)>, ) { for (key, value) in schema { @@ -118,8 +119,8 @@ pub(crate) fn scan_borrowed_object_into_scratch_map<'a>( pub(crate) fn scan_owned_object_into_scratch_map<'a>( schema: &'a Map, draft: Draft, - references: &mut BorrowedReferenceSlots<'a>, - children: &mut Vec>, + references: &mut ReferenceSlots<'a>, + children: &mut Vec>, ) -> (Option<&'a str>, bool) { let mut raw_id = None; let mut has_ref = false; @@ -139,16 +140,12 @@ pub(crate) fn scan_owned_object_into_scratch_map<'a>( } } "additionalItems" | "additionalProperties" | "contains" | "not" | "propertyNames" => { - children.push(OwnedScratchChild::key( - key.as_str(), - value, - draft.detect(value), - )); + children.push(ChildNode::key(key.as_str(), value, draft.detect(value))); } "allOf" | "anyOf" | "oneOf" => { if let Some(arr) = value.as_array() { for (index, item) in arr.iter().enumerate() { - children.push(OwnedScratchChild::key_index( + children.push(ChildNode::key_index( key.as_str(), index, item, @@ -160,7 +157,7 @@ pub(crate) fn scan_owned_object_into_scratch_map<'a>( "definitions" | "patternProperties" | "properties" => { if let Some(obj) = value.as_object() { for (child_key, child_value) in obj { - children.push(OwnedScratchChild::key_key( + children.push(ChildNode::key_key( key.as_str(), child_key.as_str(), child_value, @@ -172,7 +169,7 @@ pub(crate) fn scan_owned_object_into_scratch_map<'a>( "items" => match value { Value::Array(arr) => { for (index, item) in arr.iter().enumerate() { - children.push(OwnedScratchChild::key_index( + children.push(ChildNode::key_index( "items", index, item, @@ -180,7 +177,7 @@ pub(crate) fn scan_owned_object_into_scratch_map<'a>( )); } } - _ => children.push(OwnedScratchChild::key("items", value, draft.detect(value))), + _ => children.push(ChildNode::key("items", value, draft.detect(value))), }, "dependencies" => { if let Some(obj) = value.as_object() { @@ -188,7 +185,7 @@ pub(crate) fn scan_owned_object_into_scratch_map<'a>( if !child_value.is_object() { continue; } - children.push(OwnedScratchChild::key_key( + children.push(ChildNode::key_key( key.as_str(), child_key.as_str(), child_value, @@ -379,7 +376,7 @@ pub(crate) fn maybe_in_subresource<'r>( "patternProperties", "properties", ]; - subresources::maybe_in_subresource_with_items_and_dependencies( + draft202012::maybe_in_subresource_with_items_and_dependencies( segments, resolver, subresource, diff --git a/crates/jsonschema-referencing/src/specification/draft7.rs b/crates/jsonschema-referencing/src/spec/draft7.rs similarity index 93% rename from crates/jsonschema-referencing/src/specification/draft7.rs rename to crates/jsonschema-referencing/src/spec/draft7.rs index 544d780d..ee9ea680 100644 --- a/crates/jsonschema-referencing/src/specification/draft7.rs +++ b/crates/jsonschema-referencing/src/spec/draft7.rs @@ -1,13 +1,14 @@ use serde_json::{Map, Value}; use crate::{ - specification::{BorrowedReferenceSlots, Draft, OwnedObjectGate, OwnedScratchChild}, + draft::Draft, + spec::{ChildNode, ObjectInfo, ReferenceSlots}, Error, JsonPointerNode, Resolver, ResourceRef, Segments, }; -use super::subresources::{self, SubresourceIteratorInner}; +use super::draft202012::{self, SubresourceIteratorInner}; -pub(crate) fn owned_object_gate_map(schema: &Map) -> OwnedObjectGate<'_> { +pub(crate) fn object_info(schema: &Map) -> ObjectInfo<'_> { let mut raw_id = None; let mut ref_ = None; let mut schema_ref = None; @@ -53,7 +54,7 @@ pub(crate) fn owned_object_gate_map(schema: &Map) -> OwnedObjectG _ => None, }; - OwnedObjectGate { + ObjectInfo { id, has_anchor, ref_, @@ -65,7 +66,7 @@ pub(crate) fn owned_object_gate_map(schema: &Map) -> OwnedObjectG pub(crate) fn scan_borrowed_object_into_scratch_map<'a>( schema: &'a Map, draft: Draft, - references: &mut BorrowedReferenceSlots<'a>, + references: &mut ReferenceSlots<'a>, children: &mut Vec<(&'a Value, Draft)>, ) { for (key, value) in schema { @@ -130,8 +131,8 @@ pub(crate) fn scan_borrowed_object_into_scratch_map<'a>( pub(crate) fn scan_owned_object_into_scratch_map<'a>( schema: &'a Map, draft: Draft, - references: &mut BorrowedReferenceSlots<'a>, - children: &mut Vec>, + references: &mut ReferenceSlots<'a>, + children: &mut Vec>, ) -> (Option<&'a str>, bool) { let mut raw_id = None; let mut has_ref = false; @@ -158,16 +159,12 @@ pub(crate) fn scan_owned_object_into_scratch_map<'a>( | "not" | "propertyNames" | "then" => { - children.push(OwnedScratchChild::key( - key.as_str(), - value, - draft.detect(value), - )); + children.push(ChildNode::key(key.as_str(), value, draft.detect(value))); } "allOf" | "anyOf" | "oneOf" => { if let Some(arr) = value.as_array() { for (index, item) in arr.iter().enumerate() { - children.push(OwnedScratchChild::key_index( + children.push(ChildNode::key_index( key.as_str(), index, item, @@ -179,7 +176,7 @@ pub(crate) fn scan_owned_object_into_scratch_map<'a>( "definitions" | "patternProperties" | "properties" => { if let Some(obj) = value.as_object() { for (child_key, child_value) in obj { - children.push(OwnedScratchChild::key_key( + children.push(ChildNode::key_key( key.as_str(), child_key.as_str(), child_value, @@ -191,7 +188,7 @@ pub(crate) fn scan_owned_object_into_scratch_map<'a>( "items" => match value { Value::Array(arr) => { for (index, item) in arr.iter().enumerate() { - children.push(OwnedScratchChild::key_index( + children.push(ChildNode::key_index( "items", index, item, @@ -199,7 +196,7 @@ pub(crate) fn scan_owned_object_into_scratch_map<'a>( )); } } - _ => children.push(OwnedScratchChild::key("items", value, draft.detect(value))), + _ => children.push(ChildNode::key("items", value, draft.detect(value))), }, "dependencies" => { if let Some(obj) = value.as_object() { @@ -207,7 +204,7 @@ pub(crate) fn scan_owned_object_into_scratch_map<'a>( if !child_value.is_object() { continue; } - children.push(OwnedScratchChild::key_key( + children.push(ChildNode::key_key( key.as_str(), child_key.as_str(), child_value, @@ -427,7 +424,7 @@ pub(crate) fn maybe_in_subresource<'r>( "patternProperties", "properties", ]; - subresources::maybe_in_subresource_with_items_and_dependencies( + draft202012::maybe_in_subresource_with_items_and_dependencies( segments, resolver, subresource, diff --git a/crates/jsonschema-referencing/src/specification/ids.rs b/crates/jsonschema-referencing/src/spec/ids.rs similarity index 100% rename from crates/jsonschema-referencing/src/specification/ids.rs rename to crates/jsonschema-referencing/src/spec/ids.rs diff --git a/crates/jsonschema-referencing/src/spec/mod.rs b/crates/jsonschema-referencing/src/spec/mod.rs new file mode 100644 index 00000000..1b2ce37b --- /dev/null +++ b/crates/jsonschema-referencing/src/spec/mod.rs @@ -0,0 +1,111 @@ +//! Types used by the schema traversal machinery during registry building. +//! +//! When the registry walks a schema document, each JSON object is scanned to extract +//! relevant information: +//! - [`ObjectScan`]: lightweight flags — whether the object has an `$id`, anchors, `$ref`/`$schema`. +//! - [`ObjectInfo`]: the full keyword values of `$id`, `$ref`, and `$schema`. +//! - [`ChildNode`]: a child to process next in the BFS queue, with its path and active draft. +//! +//! The sub-modules contain draft-specific scanning logic that produces these types. + +use serde_json::{Map, Value}; + +pub(crate) mod draft201909; +pub(crate) mod draft202012; +pub(crate) mod draft4; +pub(crate) mod draft6; +pub(crate) mod draft7; +pub(crate) mod ids; + +use crate::draft::Draft; + +/// A single path step to a child node: an object key or array index. +#[derive(Copy, Clone)] +pub(crate) enum PathSegment<'a> { + Key(&'a str), + Index(usize), +} + +/// Lightweight scan result for a JSON object: flags for id, anchors, and $ref/$schema. +pub(crate) struct ObjectScan<'a> { + pub(crate) id: Option<&'a str>, + pub(crate) has_anchor: bool, + pub(crate) has_ref_or_schema: bool, +} + +/// Detailed keyword values extracted from a JSON object during schema processing. +pub(crate) struct ObjectInfo<'a> { + pub(crate) id: Option<&'a str>, + pub(crate) has_anchor: bool, + pub(crate) ref_: Option<&'a str>, + pub(crate) schema: Option<&'a str>, + pub(crate) has_children: bool, +} + +/// A child node queued for the BFS traversal of a schema document. +#[derive(Copy, Clone)] +pub(crate) struct ChildNode<'a> { + pub(crate) first: PathSegment<'a>, + pub(crate) second: Option>, + pub(crate) value: &'a Value, + pub(crate) draft: Draft, +} + +impl<'a> ChildNode<'a> { + #[inline] + pub(crate) fn key(key: &'a str, value: &'a Value, draft: Draft) -> Self { + Self { + first: PathSegment::Key(key), + second: None, + value, + draft, + } + } + + #[inline] + pub(crate) fn key_index(key: &'a str, index: usize, value: &'a Value, draft: Draft) -> Self { + Self { + first: PathSegment::Key(key), + second: Some(PathSegment::Index(index)), + value, + draft, + } + } + + #[inline] + pub(crate) fn key_key( + key: &'a str, + child_key: &'a str, + value: &'a Value, + draft: Draft, + ) -> Self { + Self { + first: PathSegment::Key(key), + second: Some(PathSegment::Key(child_key)), + value, + draft, + } + } +} + +/// Captured `$ref` and `$schema` string values for a schema object being processed. +#[derive(Default)] +pub(crate) struct ReferenceSlots<'a> { + pub(crate) ref_: Option<&'a str>, + pub(crate) schema: Option<&'a str>, +} + +#[inline] +pub(crate) fn has_ref_or_schema(schema: &Map) -> bool { + if schema.len() <= 3 { + for (key, value) in schema { + if (key == "$ref" || key == "$schema") && value.is_string() { + return true; + } + } + false + } else { + schema.get("$ref").and_then(Value::as_str).is_some() + || schema.get("$schema").and_then(Value::as_str).is_some() + } +} diff --git a/crates/jsonschema/src/compiler.rs b/crates/jsonschema/src/compiler.rs index 8d7667cf..07fc6a30 100644 --- a/crates/jsonschema/src/compiler.rs +++ b/crates/jsonschema/src/compiler.rs @@ -378,13 +378,13 @@ impl<'a> Context<'a> { ) -> Result { let uri = self .resolver - .resolve_against(&self.resolver.base_uri().borrow(), reference)?; + .resolve_uri(&self.resolver.base_uri().borrow(), reference)?; Ok(self.shared.seen.borrow().contains(&*uri)) } pub(crate) fn mark_seen(&self, reference: &str) -> Result<(), referencing::Error> { let uri = self .resolver - .resolve_against(&self.resolver.base_uri().borrow(), reference)?; + .resolve_uri(&self.resolver.base_uri().borrow(), reference)?; self.shared.seen.borrow_mut().insert(uri); Ok(()) } @@ -402,7 +402,7 @@ impl<'a> Context<'a> { } let result = self .resolver - .resolve_against(&self.resolver.base_uri().borrow(), &buffer); + .resolve_uri(&self.resolver.base_uri().borrow(), &buffer); buffer.clear(); result } @@ -412,7 +412,7 @@ impl<'a> Context<'a> { reference: &str, ) -> Result>, referencing::Error> { self.resolver - .resolve_against(&self.resolver.base_uri().borrow(), reference) + .resolve_uri(&self.resolver.base_uri().borrow(), reference) } pub(crate) fn cached_location_node(&self, key: &LocationCacheKey) -> Option { @@ -827,7 +827,7 @@ pub(crate) fn normalized_base_uri_for_generated_registry( registry: &referencing::Registry<'_>, base_uri: &referencing::Uri, ) -> referencing::Uri { - if registry.contains_resource_uri(base_uri.as_str()) { + if registry.contains_resource(base_uri.as_str()) { return base_uri.clone(); } @@ -837,7 +837,7 @@ pub(crate) fn normalized_base_uri_for_generated_registry( { let mut normalized = base_uri.clone(); normalized.set_fragment(None); - if registry.contains_resource_uri(normalized.as_str()) { + if registry.contains_resource(normalized.as_str()) { return normalized; } } From 404d46c871ca9cdd05fd8843d8f199947dca4e69 Mon Sep 17 00:00:00 2001 From: Dmitry Dygalo Date: Tue, 7 Apr 2026 14:34:04 +0200 Subject: [PATCH 08/14] wip Signed-off-by: Dmitry Dygalo --- crates/jsonschema-referencing/Cargo.toml | 2 - crates/jsonschema-referencing/src/lib.rs | 9 ---- crates/jsonschema-referencing/src/pointer.rs | 4 -- .../src/registry/build.rs | 42 ------------------- .../jsonschema-referencing/src/spec/draft4.rs | 40 ++++++------------ crates/jsonschema-referencing/tests/suite | 2 +- crates/jsonschema/tests/suite | 2 +- 7 files changed, 14 insertions(+), 87 deletions(-) diff --git a/crates/jsonschema-referencing/Cargo.toml b/crates/jsonschema-referencing/Cargo.toml index 8dab6ec3..e1769f5e 100644 --- a/crates/jsonschema-referencing/Cargo.toml +++ b/crates/jsonschema-referencing/Cargo.toml @@ -12,7 +12,6 @@ license.workspace = true [features] default = [] retrieve-async = ["dep:async-trait", "dep:futures"] -perf-observe-registry = [] [target.'cfg(target_arch = "wasm32")'.dependencies] getrandom = { version = "0.3.4", features = ["wasm_js"] } @@ -29,7 +28,6 @@ percent-encoding = "2.3.1" serde_json.workspace = true hashbrown = "0.16" micromap = "0.3.0" -bytecount = { version = "0.6", features = ["runtime-dispatch-simd"] } [dev-dependencies] benchmark = { path = "../benchmark/" } diff --git a/crates/jsonschema-referencing/src/lib.rs b/crates/jsonschema-referencing/src/lib.rs index 8d1f975a..ee58a7fd 100644 --- a/crates/jsonschema-referencing/src/lib.rs +++ b/crates/jsonschema-referencing/src/lib.rs @@ -1,15 +1,6 @@ //! # referencing //! //! An implementation-agnostic JSON reference resolution library for Rust. -#[macro_export] -macro_rules! observe_registry { - ($($arg:tt)*) => {{ - #[cfg(feature = "perf-observe-registry")] - { - println!($($arg)*); - } - }}; -} mod anchor; mod cache; diff --git a/crates/jsonschema-referencing/src/pointer.rs b/crates/jsonschema-referencing/src/pointer.rs index f46e83ca..2bcc7682 100644 --- a/crates/jsonschema-referencing/src/pointer.rs +++ b/crates/jsonschema-referencing/src/pointer.rs @@ -74,10 +74,6 @@ pub(crate) enum ParsedPointerSegment { /// /// **NOTE**: A slightly faster version of pointer resolution based on `Value::pointer` from `serde_json`. pub fn pointer<'a>(document: &'a Value, pointer: &str) -> Option<&'a Value> { - crate::observe_registry!( - "registry.pointer_segments={}", - bytecount::count(pointer.as_bytes(), b'/') - ); if pointer.is_empty() { return Some(document); } diff --git a/crates/jsonschema-referencing/src/registry/build.rs b/crates/jsonschema-referencing/src/registry/build.rs index 30a6b1ed..0ff4e1d1 100644 --- a/crates/jsonschema-referencing/src/registry/build.rs +++ b/crates/jsonschema-referencing/src/registry/build.rs @@ -491,22 +491,8 @@ fn explore_borrowed_subtree<'r>( local_seen: &mut LocalSeen<'r>, ) -> Result<(), Error> { let object = subschema.as_object(); - #[cfg(feature = "perf-observe-registry")] - if let Some(object) = object { - crate::observe_registry!("registry.borrowed.object_len={}", object.len()); - } let scan = object.map(|schema| draft.scan_object(schema)); if let Some(scan) = scan.as_ref() { - #[cfg(feature = "perf-observe-registry")] - { - let id_scan = match (scan.id.is_some(), scan.has_anchor) { - (false, false) => "none", - (true, false) => "id_only", - (false, true) => "anchor_only", - (true, true) => "id_and_anchor", - }; - crate::observe_registry!("registry.id_scan={id_scan}"); - } if let Some(id) = scan.id { let original_base_uri = Arc::clone(¤t_base_uri); current_base_uri = resolve_id(¤t_base_uri, id, resolution_cache)?; @@ -776,10 +762,6 @@ fn explore_owned_subtree<'a, 'r>( local_seen: &mut LocalSeen<'a>, ) -> Result<(), Error> { let object = subschema.as_object(); - #[cfg(feature = "perf-observe-registry")] - if let Some(object) = object { - crate::observe_registry!("registry.owned.object_len={}", object.len()); - } let child_start = owned_child_scratch.len(); let info = object.map(|schema| draft.object_info(schema)); let (id, has_anchors) = info @@ -819,17 +801,6 @@ fn explore_owned_subtree<'a, 'r>( if let (Some(schema), Some(info)) = (object, info.as_ref()) { if info.ref_.is_some() || info.schema.is_some() { - #[cfg(feature = "perf-observe-registry")] - { - let kind = if schema.len() == 1 { - "ref_only_leaf" - } else if info.has_children { - "ref_with_children" - } else { - "ref_no_children" - }; - crate::observe_registry!("registry.owned.gate={kind}"); - } if schema.len() == 1 { owned_reference_scratch.ref_ = info.ref_; owned_reference_scratch.schema = info.schema; @@ -1375,7 +1346,6 @@ fn collect_external_resources_from_slots<'doc>( continue; } if reference.starts_with('#') { - crate::observe_registry!("registry.local_ref={}", reference); if mark_local_reference(local_seen, base, reference) { let ptr = reference.trim_start_matches('#'); if let Some(referenced) = pointer(root, ptr) { @@ -1393,11 +1363,6 @@ fn collect_external_resources_from_slots<'doc>( continue; } if mark_reference(seen, base, reference) { - if key == "$schema" { - crate::observe_registry!("registry.schema_ref={}", reference); - } else { - crate::observe_registry!("registry.external_ref={}", reference); - } let resolved = if base.has_fragment() { let mut base_without_fragment = base.as_ref().clone(); base_without_fragment.set_fragment(None); @@ -1485,7 +1450,6 @@ fn collect_external_resources<'doc>( } } else if $reference != "#" { if $reference.starts_with('#') { - crate::observe_registry!("registry.local_ref={}", $reference); if mark_local_reference(local_seen, base, $reference) { let ptr = $reference.trim_start_matches('#'); if let Some(referenced) = pointer(root, ptr) { @@ -1501,11 +1465,6 @@ fn collect_external_resources<'doc>( } } } else if mark_reference(seen, base, $reference) { - if $key == "$schema" { - crate::observe_registry!("registry.schema_ref={}", $reference); - } else { - crate::observe_registry!("registry.external_ref={}", $reference); - } let resolved = if base.has_fragment() { let mut base_without_fragment = base.as_ref().clone(); base_without_fragment.set_fragment(None); @@ -1545,7 +1504,6 @@ fn collect_external_resources<'doc>( } if let Some(object) = contents.as_object() { - crate::observe_registry!("registry.ref_scan.object_len={}", object.len()); if object.len() < 3 { for (key, value) in object { if key == "$ref" { diff --git a/crates/jsonschema-referencing/src/spec/draft4.rs b/crates/jsonschema-referencing/src/spec/draft4.rs index 7fd3227b..1e1ff784 100644 --- a/crates/jsonschema-referencing/src/spec/draft4.rs +++ b/crates/jsonschema-referencing/src/spec/draft4.rs @@ -84,7 +84,6 @@ pub(crate) fn scan_borrowed_object_into_scratch_map<'a>( } } "additionalItems" | "additionalProperties" if value.is_object() => { - crate::observe_registry!("registry.draft4.keyword={}", key); children.push((value, draft.detect(value))); } "contains" @@ -96,11 +95,9 @@ pub(crate) fn scan_borrowed_object_into_scratch_map<'a>( | "then" | "unevaluatedItems" | "unevaluatedProperties" => { - crate::observe_registry!("registry.draft4.keyword={}", key); children.push((value, draft.detect(value))); } "allOf" | "anyOf" | "oneOf" | "prefixItems" => { - crate::observe_registry!("registry.draft4.keyword={}", key); if let Some(arr) = value.as_array() { for item in arr { children.push((item, draft.detect(item))); @@ -108,26 +105,21 @@ pub(crate) fn scan_borrowed_object_into_scratch_map<'a>( } } "$defs" | "definitions" | "dependentSchemas" | "patternProperties" | "properties" => { - crate::observe_registry!("registry.draft4.keyword={}", key); if let Some(obj) = value.as_object() { for child_value in obj.values() { children.push((child_value, draft.detect(child_value))); } } } - "items" => { - crate::observe_registry!("registry.draft4.keyword=items"); - match value { - Value::Array(arr) => { - for item in arr { - children.push((item, draft.detect(item))); - } + "items" => match value { + Value::Array(arr) => { + for item in arr { + children.push((item, draft.detect(item))); } - _ => children.push((value, draft.detect(value))), } - } + _ => children.push((value, draft.detect(value))), + }, "dependencies" => { - crate::observe_registry!("registry.draft4.keyword=dependencies"); if let Some(obj) = value.as_object() { for child_value in obj.values() { if !child_value.is_object() { @@ -254,7 +246,6 @@ where for (key, value) in schema { match key.as_str() { "additionalItems" | "additionalProperties" if value.is_object() => { - crate::observe_registry!("registry.draft4.keyword={}", key); f(value, draft.detect(value))?; } "contains" @@ -266,11 +257,9 @@ where | "then" | "unevaluatedItems" | "unevaluatedProperties" => { - crate::observe_registry!("registry.draft4.keyword={}", key); f(value, draft.detect(value))?; } "allOf" | "anyOf" | "oneOf" | "prefixItems" => { - crate::observe_registry!("registry.draft4.keyword={}", key); if let Some(arr) = value.as_array() { for item in arr { f(item, draft.detect(item))?; @@ -278,26 +267,21 @@ where } } "$defs" | "definitions" | "dependentSchemas" | "patternProperties" | "properties" => { - crate::observe_registry!("registry.draft4.keyword={}", key); if let Some(obj) = value.as_object() { for child_value in obj.values() { f(child_value, draft.detect(child_value))?; } } } - "items" => { - crate::observe_registry!("registry.draft4.keyword=items"); - match value { - Value::Array(arr) => { - for item in arr { - f(item, draft.detect(item))?; - } + "items" => match value { + Value::Array(arr) => { + for item in arr { + f(item, draft.detect(item))?; } - _ => f(value, draft.detect(value))?, } - } + _ => f(value, draft.detect(value))?, + }, "dependencies" => { - crate::observe_registry!("registry.draft4.keyword=dependencies"); if let Some(obj) = value.as_object() { for child_value in obj.values() { if !child_value.is_object() { diff --git a/crates/jsonschema-referencing/tests/suite b/crates/jsonschema-referencing/tests/suite index de5bc6f7..b062c384 160000 --- a/crates/jsonschema-referencing/tests/suite +++ b/crates/jsonschema-referencing/tests/suite @@ -1 +1 @@ -Subproject commit de5bc6f7ff9fda78f35bd6f6276b3bcd3a3f8c3f +Subproject commit b062c3849e41b7a173d0dd64081811effb0c19ab diff --git a/crates/jsonschema/tests/suite b/crates/jsonschema/tests/suite index e819f329..54ed4d1f 160000 --- a/crates/jsonschema/tests/suite +++ b/crates/jsonschema/tests/suite @@ -1 +1 @@ -Subproject commit e819f329268130e0ed5bbc87b071c83d8e02a68a +Subproject commit 54ed4d1fb0ac8ab16c453df5db8b5e81689c6fed From 177b28002f43c4216f7397f0e02e1b05d70fc42e Mon Sep 17 00:00:00 2001 From: Dmitry Dygalo Date: Tue, 7 Apr 2026 19:29:46 +0200 Subject: [PATCH 09/14] wip Signed-off-by: Dmitry Dygalo --- .../src/registry/build.rs | 1237 +++++++---------- .../src/registry/index.rs | 104 +- .../src/registry/input.rs | 18 +- .../src/registry/mod.rs | 228 +-- .../src/spec/draft201909.rs | 307 ++-- .../src/spec/draft202012.rs | 195 +-- .../jsonschema-referencing/src/spec/draft4.rs | 315 ++--- .../jsonschema-referencing/src/spec/draft6.rs | 260 ++-- .../jsonschema-referencing/src/spec/draft7.rs | 293 ++-- 9 files changed, 1147 insertions(+), 1810 deletions(-) diff --git a/crates/jsonschema-referencing/src/registry/build.rs b/crates/jsonschema-referencing/src/registry/build.rs index 0ff4e1d1..286f1752 100644 --- a/crates/jsonschema-referencing/src/registry/build.rs +++ b/crates/jsonschema-referencing/src/registry/build.rs @@ -1,7 +1,7 @@ //! BFS pipeline that processes pending resources into the prepared index. //! //! Entry points: -//! - [`process_resources_mixed`]: processes owned and borrowed resources together. +//! - [`index_resources`]: processes pending resources and returns a prepared index. //! - [`build_prepared_index_for_documents`]: builds an index from pre-stored documents //! (used by the static [`super::SPECIFICATIONS`] registry). //! @@ -18,13 +18,11 @@ use crate::{ cache::UriCache, meta::metas_for_draft, pointer::{pointer, ParsedPointer, ParsedPointerSegment}, - uri, Anchor, Draft, Error, JsonPointerNode, ResourceRef, Retrieve, + spec::{ChildNode, PathSegment, ReferenceSlots}, + uri, Draft, Error, JsonPointerNode, Retrieve, }; -use super::{ - index::{IndexedAnchor, IndexedAnchorKind, IndexedResource, PreparedIndex}, - input::PendingResource, -}; +use super::{index::Index, input::PendingResource}; /// A schema document stored in the registry, either borrowed from the caller or owned. #[derive(Debug)] @@ -56,7 +54,7 @@ impl<'a> StoredDocument<'a> { } #[inline] - fn borrowed_contents(&self) -> Option<&'a Value> { + pub(super) fn borrowed_contents(&self) -> Option<&'a Value> { match &self.value { Cow::Borrowed(value) => Some(value), Cow::Owned(_) => None, @@ -70,27 +68,101 @@ impl<'a> StoredDocument<'a> { } pub(super) type DocumentStore<'a> = AHashMap>, Arc>>; +pub(super) type KnownResources = AHashSet>; +type ReferenceTracker = AHashSet; +type VisitedRefs<'a> = AHashSet<(NonZeroUsize, &'a str)>; +/// An entry in the processing queue. +/// `(base_uri, document_root_uri, pointer, draft)` +/// +/// `pointer` is a JSON Pointer relative to the document root (`""` means root). +/// Local `$ref`s are always resolved against the document root. +type QueueEntry = (Arc>, Arc>, String, Draft); + +/// A deferred local `$ref` target. +/// +/// Like [`QueueEntry`] but carries the pre-resolved value address (`value_addr`) obtained +/// for free during the `pointer()` call at push time. Used in [`process_deferred_refs`] to +/// skip already-visited targets without a second `pointer()` traversal. +/// +/// `(base_uri, document_root_uri, pointer, draft, value_addr)` +type DeferredRef = (Arc>, Arc>, String, Draft, usize); + +pub(super) fn index_resources<'a>( + pairs: impl IntoIterator, PendingResource<'a>)>, + retriever: &dyn Retrieve, + documents: &mut DocumentStore<'a>, + known_resources: &mut KnownResources, + resolution_cache: &mut UriCache, + draft_override: Option, +) -> Result<(Vec, Index<'a>), Error> { + let mut state = ProcessingState::new(); + ingest_input_resources( + pairs, + documents, + known_resources, + &mut state, + draft_override, + ); + resolve_and_index( + &mut state, + documents, + known_resources, + resolution_cache, + draft_override.unwrap_or_default(), + retriever, + )?; + Ok((state.custom_metaschemas, state.index)) +} + +#[cfg(feature = "retrieve-async")] +pub(super) async fn index_resources_async<'a>( + pairs: impl IntoIterator, PendingResource<'a>)>, + retriever: &dyn crate::AsyncRetrieve, + documents: &mut DocumentStore<'a>, + known_resources: &mut KnownResources, + resolution_cache: &mut UriCache, + draft_override: Option, +) -> Result<(Vec, Index<'a>), Error> { + let mut state = ProcessingState::new(); + ingest_input_resources( + pairs, + documents, + known_resources, + &mut state, + draft_override, + ); + resolve_and_index_async( + &mut state, + documents, + known_resources, + resolution_cache, + draft_override.unwrap_or_default(), + retriever, + ) + .await?; + Ok((state.custom_metaschemas, state.index)) +} + /// Build prepared local index data for all documents already in `documents`. -/// Used by `build_from_meta_schemas` for the static SPECIFICATIONS registry. pub(super) fn build_prepared_index_for_documents<'a>( documents: &DocumentStore<'a>, resolution_cache: &mut UriCache, -) -> Result, Error> { +) -> Result, Error> { let mut state = ProcessingState::new(); let mut known_resources = KnownResources::default(); for (doc_uri, document) in documents { known_resources.insert((**doc_uri).clone()); - insert_root_index_entries(&mut state.index_data, doc_uri, document); + state.index.register_document(doc_uri, document); } for (doc_uri, document) in documents { - if document.borrowed_contents().is_some() { - let mut local_seen = LocalSeen::new(); + if let Some(document_root) = document.borrowed_contents() { + let mut local_seen = VisitedRefs::new(); process_borrowed_document( Arc::clone(doc_uri), doc_uri, - document, + document_root, "", document.draft(), &mut state, @@ -99,9 +171,8 @@ pub(super) fn build_prepared_index_for_documents<'a>( &mut local_seen, )?; } else { - let mut local_seen = LocalSeen::new(); - let mut owned_reference_scratch = crate::spec::ReferenceSlots::default(); - let mut owned_child_scratch: Vec> = Vec::new(); + let mut local_seen = VisitedRefs::new(); + let mut owned_child_scratch = Vec::new(); process_owned_document( Arc::clone(doc_uri), doc_uri, @@ -111,15 +182,13 @@ pub(super) fn build_prepared_index_for_documents<'a>( &mut state, &mut known_resources, resolution_cache, - &mut owned_reference_scratch, &mut owned_child_scratch, &mut local_seen, )?; } } - Ok(state.index_data) + Ok(state.index) } -pub(super) type KnownResources = AHashSet>; #[derive(Hash, Eq, PartialEq)] struct ReferenceKey { @@ -137,12 +206,7 @@ impl ReferenceKey { } } -type ReferenceTracker = AHashSet; - -/// Allocation-free local-ref deduplication: stores (`base_arc_ptr`, &`str_borrowed_from_json`). -type LocalSeen<'a> = AHashSet<(NonZeroUsize, &'a str)>; - -/// Clears a [`LocalSeen`] set and reinterprets it with a different borrow lifetime, +/// Clears a [`VisitedLocalRefs`] set and reinterprets it with a different borrow lifetime, /// reusing the backing heap allocation across processing phases. /// /// # Safety @@ -154,7 +218,7 @@ type LocalSeen<'a> = AHashSet<(NonZeroUsize, &'a str)>; /// - Verified under MIRI (tree borrows): no undefined behaviour detected. #[allow(unsafe_code)] #[inline] -unsafe fn reuse_local_seen<'b>(mut s: LocalSeen<'_>) -> LocalSeen<'b> { +unsafe fn reuse_visited_local_refs<'b>(mut s: VisitedRefs<'_>) -> VisitedRefs<'b> { s.clear(); // SAFETY: see above — layouts identical, no live 'a refs after clear() std::mem::transmute(s) @@ -166,134 +230,6 @@ enum ReferenceKind { Schema, } -/// An entry in the processing queue. -/// `(base_uri, document_root_uri, pointer, draft)` -/// -/// `pointer` is a JSON Pointer relative to the document root (`""` means root). -/// Local `$ref`s are always resolved against the document root. -type QueueEntry = (Arc>, Arc>, String, Draft); - -/// A deferred local `$ref` target. -/// -/// Like [`QueueEntry`] but carries the pre-resolved value address (`value_addr`) obtained -/// for free during the `pointer()` call at push time. Used in [`process_deferred_refs`] to -/// skip already-visited targets without a second `pointer()` traversal. -/// -/// `(base_uri, document_root_uri, pointer, draft, value_addr)` -type DeferredRef = (Arc>, Arc>, String, Draft, usize); - -fn insert_borrowed_anchor_entries<'a>( - index_data: &mut PreparedIndex<'a>, - uri: &Arc>, - draft: Draft, - contents: &'a Value, -) { - let anchors = index_data.anchors.get_or_insert_default(Arc::clone(uri)); - for anchor in draft.anchors(contents) { - anchors.insert( - anchor.name().to_string().into_boxed_str(), - IndexedAnchor::Borrowed(anchor), - ); - } -} - -fn insert_owned_anchor_entries<'a>( - index_data: &mut PreparedIndex<'a>, - uri: &Arc>, - document: &Arc>, - pointer: &ParsedPointer, - draft: Draft, - contents: &Value, -) { - let anchors = index_data.anchors.get_or_insert_default(Arc::clone(uri)); - for anchor in draft.anchors(contents) { - let (name, kind) = match anchor { - Anchor::Default { name, .. } => (name, IndexedAnchorKind::Default), - Anchor::Dynamic { name, .. } => (name, IndexedAnchorKind::Dynamic), - }; - anchors.insert( - name.to_string().into_boxed_str(), - IndexedAnchor::Owned { - document: Arc::clone(document), - pointer: pointer.clone(), - draft, - kind, - name: name.to_string().into_boxed_str(), - }, - ); - } -} - -fn insert_root_index_entries<'a>( - index_data: &mut PreparedIndex<'a>, - doc_key: &Arc>, - document: &Arc>, -) { - if let Some(contents) = document.borrowed_contents() { - index_data.resources.insert( - Arc::clone(doc_key), - IndexedResource::Borrowed(ResourceRef::new(contents, document.draft())), - ); - insert_borrowed_anchor_entries(index_data, doc_key, document.draft(), contents); - } else { - let pointer = ParsedPointer::default(); - index_data.resources.insert( - Arc::clone(doc_key), - IndexedResource::Owned { - document: Arc::clone(document), - pointer: pointer.clone(), - draft: document.draft(), - }, - ); - insert_owned_anchor_entries( - index_data, - doc_key, - document, - &pointer, - document.draft(), - document.contents(), - ); - } -} - -fn insert_borrowed_discovered_index_entries<'a>( - index_data: &mut PreparedIndex<'a>, - uri: &Arc>, - draft: Draft, - has_id: bool, - contents: &'a Value, -) { - if has_id { - index_data.resources.insert( - Arc::clone(uri), - IndexedResource::Borrowed(ResourceRef::new(contents, draft)), - ); - } - insert_borrowed_anchor_entries(index_data, uri, draft, contents); -} - -fn insert_owned_discovered_index_entries<'a>( - index_data: &mut PreparedIndex<'a>, - uri: &Arc>, - document: &Arc>, - pointer: &ParsedPointer, - draft: Draft, - has_id: bool, - contents: &Value, -) { - if has_id { - index_data.resources.insert( - Arc::clone(uri), - IndexedResource::Owned { - document: Arc::clone(document), - pointer: pointer.clone(), - draft, - }, - ); - } - insert_owned_anchor_entries(index_data, uri, document, pointer, draft, contents); -} - struct ProcessingState<'a> { queue: VecDeque, seen: ReferenceTracker, @@ -313,9 +249,10 @@ struct ProcessingState<'a> { /// pre-stored value address; non-subresource paths (e.g. `#/components/schemas/Foo`) /// are still fully traversed. deferred_refs: Vec, - borrowed_reference_scratch: crate::spec::ReferenceSlots<'a>, + /// Reused scratch for `$ref`/`$schema` slot capture during both borrowed and owned traversal. + reference_scratch: ReferenceSlots<'a>, borrowed_child_scratch: Vec<(&'a Value, Draft)>, - index_data: PreparedIndex<'a>, + index: Index<'a>, } impl ProcessingState<'_> { @@ -329,65 +266,62 @@ impl ProcessingState<'_> { custom_metaschemas: Vec::new(), visited_schemas: AHashSet::new(), deferred_refs: Vec::new(), - borrowed_reference_scratch: crate::spec::ReferenceSlots::default(), + reference_scratch: ReferenceSlots::default(), borrowed_child_scratch: Vec::new(), - index_data: PreparedIndex::default(), + index: Index::default(), } } } -fn process_input_resources_mixed<'a>( + +/// Convert resources into stored documents, register them with the +/// index, and enqueue them as the starting set for BFS traversal. +/// +/// `draft` forces a specific draft for all resources; `None` means auto-detect per resource. +/// Resources are added to `known_resources` here so the retriever does not re-fetch them +/// during the BFS loop. +fn ingest_input_resources<'a>( pairs: impl IntoIterator, PendingResource<'a>)>, documents: &mut DocumentStore<'a>, known_resources: &mut KnownResources, state: &mut ProcessingState<'a>, - draft_override: Option, + draft: Option, ) { for (uri, resource) in pairs { let key = Arc::new(uri); - let draft = match &resource { - PendingResource::OwnedValue(value) => { - draft_override.unwrap_or_else(|| Draft::default().detect(value)) - } - PendingResource::BorrowedValue(value) => { - draft_override.unwrap_or_else(|| Draft::default().detect(value)) - } - PendingResource::OwnedResource(resource) => resource.draft(), - PendingResource::BorrowedResource(resource) => resource.draft(), - }; - - let r = Arc::new(match resource { - PendingResource::OwnedValue(value) => { - let (draft, contents) = draft.create_resource(value).into_inner(); - StoredDocument::owned(contents, draft) + let (draft, document) = match resource { + PendingResource::Value(value) => { + let draft = draft.unwrap_or_else(|| Draft::default().detect(&value)); + (draft, StoredDocument::owned(value, draft)) } - PendingResource::BorrowedValue(value) => { - let resource = draft.create_resource_ref(value); - StoredDocument::borrowed(resource.contents(), resource.draft()) + PendingResource::ValueRef(value) => { + let draft = draft.unwrap_or_else(|| Draft::default().detect(value)); + (draft, StoredDocument::borrowed(value, draft)) } - PendingResource::OwnedResource(resource) => { + PendingResource::Resource(resource) => { let (draft, contents) = resource.into_inner(); - StoredDocument::owned(contents, draft) + (draft, StoredDocument::owned(contents, draft)) } - PendingResource::BorrowedResource(resource) => { - StoredDocument::borrowed(resource.contents(), resource.draft()) + PendingResource::ResourceRef(resource) => { + let draft = resource.draft(); + (draft, StoredDocument::borrowed(resource.contents(), draft)) } - }); + }; + let document = Arc::new(document); - documents.insert(Arc::clone(&key), Arc::clone(&r)); + documents.insert(Arc::clone(&key), Arc::clone(&document)); known_resources.insert((*key).clone()); - insert_root_index_entries(&mut state.index_data, &key, &r); + state.index.register_document(&key, &document); + // Draft::Unknown means the resource declared a custom $schema; collect its URI + // for post-build validation that a matching meta-schema was registered. if draft == Draft::Unknown { - let contents = documents - .get(&key) - .expect("document was just inserted") - .contents(); - if let Some(meta_schema) = contents + if let Some(meta) = document + .contents() .as_object() .and_then(|obj| obj.get("$schema")) .and_then(|schema| schema.as_str()) { - state.custom_metaschemas.push(meta_schema.to_string()); + state.custom_metaschemas.push(meta.to_string()); } } @@ -403,40 +337,38 @@ fn process_queue<'r>( known_resources: &mut KnownResources, resolution_cache: &mut UriCache, ) -> Result<(), Error> { - while let Some((base, document_root_uri, pointer_path, draft)) = state.queue.pop_front() { - let Some(document) = documents.get(&document_root_uri) else { + while let Some((base, root_uri, pointer_path, draft)) = state.queue.pop_front() { + let Some(document) = documents.get(&root_uri) else { continue; }; - if document.borrowed_contents().is_some() { - let mut document_local_seen = LocalSeen::new(); + if let Some(document_root) = document.borrowed_contents() { + let mut visited = VisitedRefs::new(); process_borrowed_document( base, - &document_root_uri, - document, + &root_uri, + document_root, &pointer_path, draft, state, known_resources, resolution_cache, - &mut document_local_seen, + &mut visited, )?; continue; } - let mut document_local_seen = LocalSeen::new(); - let mut owned_reference_scratch = crate::spec::ReferenceSlots::default(); + let mut visited = VisitedRefs::new(); let mut owned_child_scratch = Vec::new(); process_owned_document( base, - &document_root_uri, + &root_uri, document, &pointer_path, draft, state, known_resources, resolution_cache, - &mut owned_reference_scratch, &mut owned_child_scratch, - &mut document_local_seen, + &mut visited, )?; } Ok(()) @@ -445,17 +377,14 @@ fn process_queue<'r>( fn process_borrowed_document<'r>( current_base_uri: Arc>, document_root_uri: &Arc>, - document: &Arc>, + document_root: &'r Value, pointer_path: &str, draft: Draft, state: &mut ProcessingState<'r>, known_resources: &mut KnownResources, resolution_cache: &mut UriCache, - local_seen: &mut LocalSeen<'r>, + visited: &mut VisitedRefs<'r>, ) -> Result<(), Error> { - let Some(document_root) = document.borrowed_contents() else { - return Ok(()); - }; let Some(subschema) = (if pointer_path.is_empty() { Some(document_root) } else { @@ -474,7 +403,7 @@ fn process_borrowed_document<'r>( state, known_resources, resolution_cache, - local_seen, + visited, ) } @@ -488,157 +417,84 @@ fn explore_borrowed_subtree<'r>( state: &mut ProcessingState<'r>, known_resources: &mut KnownResources, resolution_cache: &mut UriCache, - local_seen: &mut LocalSeen<'r>, + local_seen: &mut VisitedRefs<'r>, ) -> Result<(), Error> { - let object = subschema.as_object(); - let scan = object.map(|schema| draft.scan_object(schema)); - if let Some(scan) = scan.as_ref() { - if let Some(id) = scan.id { - let original_base_uri = Arc::clone(¤t_base_uri); - current_base_uri = resolve_id(¤t_base_uri, id, resolution_cache)?; - known_resources.insert((*current_base_uri).clone()); - let insert_resource = current_base_uri != original_base_uri; - if !(is_root_entry && current_base_uri == *document_root_uri) { - insert_borrowed_discovered_index_entries( - &mut state.index_data, - ¤t_base_uri, - draft, - insert_resource, - subschema, - ); - } - } else if scan.has_anchor && !is_root_entry { - insert_borrowed_discovered_index_entries( - &mut state.index_data, + let Some(object) = subschema.as_object() else { + return Ok(()); + }; + let scan = draft.scan_object(object); + + if let Some(id) = scan.id { + let (new_base, insert_resource) = + resolve_subresource_id(¤t_base_uri, id, known_resources, resolution_cache)?; + current_base_uri = new_base; + if !(is_root_entry && current_base_uri == *document_root_uri) { + state.index.register_borrowed_subresource( ¤t_base_uri, draft, - false, + insert_resource, subschema, ); } + } else if scan.has_anchor && !is_root_entry { + state + .index + .register_borrowed_subresource(¤t_base_uri, draft, false, subschema); } - if let (Some(schema), Some(scan)) = (object, scan.as_ref()) { - if scan.has_ref_or_schema { - let child_start = state.borrowed_child_scratch.len(); - draft.scan_borrowed_object_into_scratch_map( - schema, - &mut state.borrowed_reference_scratch, - &mut state.borrowed_child_scratch, - ); - let child_end = state.borrowed_child_scratch.len(); - - let subschema_ptr = std::ptr::from_ref::(subschema) as usize; - if state.visited_schemas.insert(subschema_ptr) { - for (reference, key) in [ - (state.borrowed_reference_scratch.ref_, "$ref"), - (state.borrowed_reference_scratch.schema, "$schema"), - ] { - let Some(reference) = reference else { - continue; - }; - if reference.starts_with("https://json-schema.org/draft/") - || reference.starts_with("http://json-schema.org/draft-") - || current_base_uri - .as_str() - .starts_with("https://json-schema.org/draft/") - { - if key == "$ref" { - state.refers_metaschemas = true; - } - continue; - } - if reference == "#" { - continue; - } - if reference.starts_with('#') { - if mark_local_reference(local_seen, ¤t_base_uri, reference) { - let ptr = reference.trim_start_matches('#'); - if let Some(referenced) = pointer(document_root, ptr) { - let target_draft = draft.detect(referenced); - let value_addr = std::ptr::from_ref::(referenced) as usize; - state.deferred_refs.push(( - Arc::clone(¤t_base_uri), - Arc::clone(document_root_uri), - ptr.to_string(), - target_draft, - value_addr, - )); - } - } - continue; - } - if mark_reference(&mut state.seen, ¤t_base_uri, reference) { - let resolved = if current_base_uri.has_fragment() { - let mut base_without_fragment = current_base_uri.as_ref().clone(); - base_without_fragment.set_fragment(None); - - let (path, fragment) = match reference.split_once('#') { - Some((path, fragment)) => (path, Some(fragment)), - None => (reference, None), - }; - - let mut resolved = (*resolution_cache - .resolve_against(&base_without_fragment.borrow(), path)?) - .clone(); - if let Some(fragment) = fragment { - if let Some(encoded) = uri::EncodedString::new(fragment) { - resolved = resolved.with_fragment(Some(encoded)); - } else { - uri::encode_to(fragment, &mut state.scratch); - resolved = resolved.with_fragment(Some( - uri::EncodedString::new_or_panic(&state.scratch), - )); - state.scratch.clear(); - } - } - resolved - } else { - (*resolution_cache - .resolve_against(¤t_base_uri.borrow(), reference)?) - .clone() - }; - - let kind = if key == "$schema" { - ReferenceKind::Schema - } else { - ReferenceKind::Ref - }; - state - .external - .insert((reference.to_string(), resolved, kind)); - } - } - } + if scan.has_ref_or_schema { + let child_start = state.borrowed_child_scratch.len(); + draft.scan_borrowed_object_into_scratch_map( + object, + &mut state.reference_scratch, + &mut state.borrowed_child_scratch, + ); + let child_end = state.borrowed_child_scratch.len(); - let mut idx = child_start; - while idx < child_end { - let (child, child_draft) = state.borrowed_child_scratch[idx]; - idx += 1; - explore_borrowed_subtree( - Arc::clone(¤t_base_uri), - document_root, - child, - child_draft, - false, - document_root_uri, - state, - known_resources, - resolution_cache, - local_seen, - )?; - } + let subschema_ptr = std::ptr::from_ref::(subschema) as usize; + if state.visited_schemas.insert(subschema_ptr) { + collect_external_resources_from_slots( + ¤t_base_uri, + document_root, + &state.reference_scratch, + &mut state.external, + &mut state.seen, + resolution_cache, + &mut state.scratch, + &mut state.refers_metaschemas, + draft, + document_root_uri, + &mut state.deferred_refs, + local_seen, + )?; + } - state.borrowed_reference_scratch.ref_ = None; - state.borrowed_reference_scratch.schema = None; - state.borrowed_child_scratch.truncate(child_start); - return Ok(()); + let mut idx = child_start; + while idx < child_end { + let (child, child_draft) = state.borrowed_child_scratch[idx]; + idx += 1; + explore_borrowed_subtree( + Arc::clone(¤t_base_uri), + document_root, + child, + child_draft, + false, + document_root_uri, + state, + known_resources, + resolution_cache, + local_seen, + )?; } + + state.reference_scratch.ref_ = None; + state.reference_scratch.schema = None; + state.borrowed_child_scratch.truncate(child_start); + return Ok(()); } + let subschema_ptr = std::ptr::from_ref::(subschema) as usize; - if state.visited_schemas.insert(subschema_ptr) - && scan.as_ref().is_none_or(|scan| scan.has_ref_or_schema) - { + if state.visited_schemas.insert(subschema_ptr) { collect_external_resources( ¤t_base_uri, document_root, @@ -655,24 +511,20 @@ fn explore_borrowed_subtree<'r>( )?; } - if let Some(schema) = object { - draft.walk_borrowed_subresources_map(schema, &mut |child, child_draft| { - explore_borrowed_subtree( - Arc::clone(¤t_base_uri), - document_root, - child, - child_draft, - false, - document_root_uri, - state, - known_resources, - resolution_cache, - local_seen, - ) - }) - } else { - Ok(()) - } + draft.walk_borrowed_subresources_map(object, &mut |child, child_draft| { + explore_borrowed_subtree( + Arc::clone(¤t_base_uri), + document_root, + child, + child_draft, + false, + document_root_uri, + state, + known_resources, + resolution_cache, + local_seen, + ) + }) } fn process_owned_document<'a, 'r>( @@ -684,9 +536,8 @@ fn process_owned_document<'a, 'r>( state: &mut ProcessingState<'r>, known_resources: &mut KnownResources, resolution_cache: &mut UriCache, - owned_reference_scratch: &mut crate::spec::ReferenceSlots<'a>, - owned_child_scratch: &mut Vec>, - local_seen: &mut LocalSeen<'a>, + owned_child_scratch: &mut Vec>, + local_seen: &mut VisitedRefs<'a>, ) -> Result<(), Error> { let document_root = document.contents(); let Some(subschema) = (if pointer_path.is_empty() { @@ -711,7 +562,6 @@ fn process_owned_document<'a, 'r>( state, known_resources, resolution_cache, - owned_reference_scratch, owned_child_scratch, local_seen, ) @@ -757,27 +607,24 @@ fn explore_owned_subtree<'a, 'r>( state: &mut ProcessingState<'r>, known_resources: &mut KnownResources, resolution_cache: &mut UriCache, - owned_reference_scratch: &mut crate::spec::ReferenceSlots<'a>, - owned_child_scratch: &mut Vec>, - local_seen: &mut LocalSeen<'a>, + owned_child_scratch: &mut Vec>, + local_seen: &mut VisitedRefs<'a>, ) -> Result<(), Error> { - let object = subschema.as_object(); + let Some(object) = subschema.as_object() else { + return Ok(()); + }; let child_start = owned_child_scratch.len(); - let info = object.map(|schema| draft.object_info(schema)); - let (id, has_anchors) = info - .as_ref() - .map_or((None, false), |info| (info.id, info.has_anchor)); + let info = draft.object_info(object); + let (id, has_anchors) = (info.id, info.has_anchor); if let Some(id) = id { - let original_base_uri = Arc::clone(¤t_base_uri); - current_base_uri = resolve_id(¤t_base_uri, id, resolution_cache)?; - known_resources.insert((*current_base_uri).clone()); - let insert_resource = current_base_uri != original_base_uri; + let (new_base, insert_resource) = + resolve_subresource_id(¤t_base_uri, id, known_resources, resolution_cache)?; + current_base_uri = new_base; if !(is_root_entry && current_base_uri == *document_root_uri) && (insert_resource || has_anchors) { let pointer = ParsedPointer::from_pointer_node(path); - insert_owned_discovered_index_entries( - &mut state.index_data, + state.index.register_owned_subresource( ¤t_base_uri, document, &pointer, @@ -788,8 +635,7 @@ fn explore_owned_subtree<'a, 'r>( } } else if has_anchors && !is_root_entry { let pointer = ParsedPointer::from_pointer_node(path); - insert_owned_discovered_index_entries( - &mut state.index_data, + state.index.register_owned_subresource( ¤t_base_uri, document, &pointer, @@ -799,97 +645,50 @@ fn explore_owned_subtree<'a, 'r>( ); } - if let (Some(schema), Some(info)) = (object, info.as_ref()) { - if info.ref_.is_some() || info.schema.is_some() { - if schema.len() == 1 { - owned_reference_scratch.ref_ = info.ref_; - owned_reference_scratch.schema = info.schema; - - let subschema_ptr = std::ptr::from_ref::(subschema) as usize; - if state.visited_schemas.insert(subschema_ptr) { - collect_external_resources_from_slots( - ¤t_base_uri, - document_root, - owned_reference_scratch, - &mut state.external, - &mut state.seen, - resolution_cache, - &mut state.scratch, - &mut state.refers_metaschemas, - draft, - document_root_uri, - &mut state.deferred_refs, - local_seen, - )?; - } - owned_reference_scratch.ref_ = None; - owned_reference_scratch.schema = None; - return Ok(()); - } - - if info.has_children { - let (_, _) = draft.scan_owned_object_into_scratch_map( - schema, - owned_reference_scratch, - owned_child_scratch, - ); - let subschema_ptr = std::ptr::from_ref::(subschema) as usize; - if state.visited_schemas.insert(subschema_ptr) { - collect_external_resources_from_slots( - ¤t_base_uri, - document_root, - owned_reference_scratch, - &mut state.external, - &mut state.seen, - resolution_cache, - &mut state.scratch, - &mut state.refers_metaschemas, - draft, - document_root_uri, - &mut state.deferred_refs, - local_seen, - )?; - } - - let child_end = owned_child_scratch.len(); - let mut idx = child_start; - while idx < child_end { - let child = owned_child_scratch[idx]; - idx += 1; - with_owned_child_path(path, &child, |child_path| { - explore_owned_subtree( - Arc::clone(¤t_base_uri), - document_root, - child.value, - child.draft, - false, - child_path, - document_root_uri, - document, - state, - known_resources, - resolution_cache, - owned_reference_scratch, - owned_child_scratch, - local_seen, - ) - })?; - } + if info.ref_.is_some() || info.schema.is_some() { + // Build stack-local slots from already-computed info; avoids a mutable scratch + // parameter while keeping zero allocations. + let slots = ReferenceSlots { + ref_: info.ref_, + schema: info.schema, + }; - owned_reference_scratch.ref_ = None; - owned_reference_scratch.schema = None; - owned_child_scratch.truncate(child_start); - return Ok(()); + if object.len() == 1 { + let subschema_ptr = std::ptr::from_ref::(subschema) as usize; + if state.visited_schemas.insert(subschema_ptr) { + collect_external_resources_from_slots( + ¤t_base_uri, + document_root, + &slots, + &mut state.external, + &mut state.seen, + resolution_cache, + &mut state.scratch, + &mut state.refers_metaschemas, + draft, + document_root_uri, + &mut state.deferred_refs, + local_seen, + )?; } + return Ok(()); + } - owned_reference_scratch.ref_ = info.ref_; - owned_reference_scratch.schema = info.schema; + if info.has_children { + // Use a temporary slot just to satisfy the API; children are written into + // owned_child_scratch which is what we actually need. + let mut scan_slots = ReferenceSlots::default(); + let (_, _) = draft.scan_owned_object_into_scratch_map( + object, + &mut scan_slots, + owned_child_scratch, + ); let subschema_ptr = std::ptr::from_ref::(subschema) as usize; if state.visited_schemas.insert(subschema_ptr) { collect_external_resources_from_slots( ¤t_base_uri, document_root, - owned_reference_scratch, + &slots, &mut state.external, &mut state.seen, resolution_cache, @@ -901,34 +700,72 @@ fn explore_owned_subtree<'a, 'r>( local_seen, )?; } - owned_reference_scratch.ref_ = None; - owned_reference_scratch.schema = None; + + let child_end = owned_child_scratch.len(); + let mut idx = child_start; + while idx < child_end { + let child = owned_child_scratch[idx]; + idx += 1; + with_owned_child_path(path, &child, |child_path| { + explore_owned_subtree( + Arc::clone(¤t_base_uri), + document_root, + child.value, + child.draft, + false, + child_path, + document_root_uri, + document, + state, + known_resources, + resolution_cache, + owned_child_scratch, + local_seen, + ) + })?; + } + + owned_child_scratch.truncate(child_start); return Ok(()); } - } - if let Some(schema) = object { - draft.walk_owned_subresources_map(schema, path, &mut |child_path, child, child_draft| { - explore_owned_subtree( - Arc::clone(¤t_base_uri), + let subschema_ptr = std::ptr::from_ref::(subschema) as usize; + if state.visited_schemas.insert(subschema_ptr) { + collect_external_resources_from_slots( + ¤t_base_uri, document_root, - child, - child_draft, - false, - child_path, - document_root_uri, - document, - state, - known_resources, + &slots, + &mut state.external, + &mut state.seen, resolution_cache, - owned_reference_scratch, - owned_child_scratch, + &mut state.scratch, + &mut state.refers_metaschemas, + draft, + document_root_uri, + &mut state.deferred_refs, local_seen, - ) - }) - } else { - Ok(()) + )?; + } + return Ok(()); } + + draft.walk_owned_subresources_map(object, path, &mut |child_path, child, child_draft| { + explore_owned_subtree( + Arc::clone(¤t_base_uri), + document_root, + child, + child_draft, + false, + child_path, + document_root_uri, + document, + state, + known_resources, + resolution_cache, + owned_child_scratch, + local_seen, + ) + }) } fn enqueue_fragment_entry( @@ -977,8 +814,7 @@ fn handle_metaschemas<'a>( Arc::new(StoredDocument::borrowed(schema, draft)), ); known_resources.insert((*key).clone()); - insert_root_index_entries( - &mut state.index_data, + state.index.register_document( &key, documents .get(&key) @@ -997,7 +833,7 @@ fn create_resource<'a>( default_draft: Draft, documents: &mut DocumentStore<'a>, known_resources: &mut KnownResources, - index_data: &mut PreparedIndex<'a>, + index: &mut Index<'a>, custom_metaschemas: &mut Vec, ) -> (Arc>, Draft) { let draft = default_draft.detect(&retrieved); @@ -1012,8 +848,7 @@ fn create_resource<'a>( .expect("document was just inserted") .contents(); known_resources.insert((*key).clone()); - insert_root_index_entries( - index_data, + index.register_document( &key, documents .get(&key) @@ -1033,64 +868,134 @@ fn create_resource<'a>( (key, draft) } -/// Shared sync processing loop used during registry preparation. After the -/// initial input has been ingested into `state`, this function drives the -/// BFS-fetch cycle until all reachable external resources have been retrieved, -/// then handles meta-schema injection and runs a final queue pass. +/// Drain the BFS queue and process all deferred local refs, reusing `local_seen_buf` +/// across iterations to avoid repeated allocation. #[allow(unsafe_code)] -fn run_sync_processing_loop<'a>( +fn drain_queue_and_deferred<'a>( state: &mut ProcessingState<'a>, - documents: &mut DocumentStore<'a>, + documents: &DocumentStore<'a>, known_resources: &mut KnownResources, resolution_cache: &mut UriCache, + visited: &mut VisitedRefs<'static>, +) -> Result<(), Error> { + // SAFETY: widens 'static → '_ (covariant); set is empty after reuse_local_seen clears it. + let mut local_visited: VisitedRefs<'_> = + unsafe { reuse_visited_local_refs(std::mem::take(visited)) }; + process_queue(state, documents, known_resources, resolution_cache)?; + process_deferred_refs(state, documents, resolution_cache, &mut local_visited)?; + // SAFETY: clears all '_ refs before narrowing back to 'static to reclaim the buffer. + *visited = unsafe { reuse_visited_local_refs(local_visited) }; + Ok(()) +} + +/// Fetch all pending external resources synchronously and enqueue the results. +fn fetch_external_resources<'a>( + state: &mut ProcessingState<'a>, + documents: &mut DocumentStore<'a>, + known_resources: &mut KnownResources, default_draft: Draft, retriever: &dyn Retrieve, ) -> Result<(), Error> { - let mut local_seen_buf: LocalSeen<'static> = LocalSeen::new(); - - loop { - if state.queue.is_empty() && state.external.is_empty() { - break; + for (original, uri, kind) in state.external.drain() { + let mut fragmentless = uri.clone(); + fragmentless.set_fragment(None); + if !known_resources.contains(&fragmentless) { + let retrieved = match retriever.retrieve(&fragmentless) { + Ok(retrieved) => retrieved, + Err(error) => { + handle_retrieve_error(&uri, &original, &fragmentless, error, kind)?; + continue; + } + }; + let (key, draft) = create_resource( + retrieved, + fragmentless, + default_draft, + documents, + known_resources, + &mut state.index, + &mut state.custom_metaschemas, + ); + enqueue_fragment_entry(&uri, &key, default_draft, documents, &mut state.queue); + state + .queue + .push_back((Arc::clone(&key), key, String::new(), draft)); } + } + Ok(()) +} - { - // SAFETY: widens 'static → '_ (covariant); set is empty after reuse_local_seen clears it. - let mut local_seen: LocalSeen<'_> = unsafe { reuse_local_seen(local_seen_buf) }; - process_queue(state, documents, known_resources, resolution_cache)?; - process_deferred_refs(state, documents, resolution_cache, &mut local_seen)?; - // SAFETY: clears all '_ refs before narrowing back to 'static to reclaim the buffer. - local_seen_buf = unsafe { reuse_local_seen(local_seen) }; +/// Fetch all pending external resources concurrently and enqueue the results. +/// Groups requests by base URI and issues them in a single `join_all` batch. +#[cfg(feature = "retrieve-async")] +async fn fetch_external_resources_async<'a>( + state: &mut ProcessingState<'a>, + documents: &mut DocumentStore<'a>, + known_resources: &mut KnownResources, + default_draft: Draft, + retriever: &dyn crate::AsyncRetrieve, +) -> Result<(), Error> { + type ExternalRefsByBase = AHashMap, Vec<(String, Uri, ReferenceKind)>>; + + if state.external.is_empty() { + return Ok(()); + } + + let mut grouped = ExternalRefsByBase::new(); + for (original, uri, kind) in state.external.drain() { + let mut fragmentless = uri.clone(); + fragmentless.set_fragment(None); + if !known_resources.contains(&fragmentless) { + grouped + .entry(fragmentless) + .or_default() + .push((original, uri, kind)); } + } - for (original, uri, kind) in state.external.drain() { - let mut fragmentless = uri.clone(); - fragmentless.set_fragment(None); - if !known_resources.contains(&fragmentless) { - let retrieved = match retriever.retrieve(&fragmentless) { - Ok(retrieved) => retrieved, - Err(error) => { - handle_retrieve_error(&uri, &original, &fragmentless, error, kind)?; - continue; - } - }; + // Use grouped.keys() for futures (borrows) then grouped.into_iter() for results (consumes). + // The map is not mutated between the two iterations, so zip order is stable. + let results = futures::future::join_all(grouped.keys().map(|u| retriever.retrieve(u))).await; - let (key, draft) = create_resource( - retrieved, - fragmentless, - default_draft, - documents, - known_resources, - &mut state.index_data, - &mut state.custom_metaschemas, - ); - enqueue_fragment_entry(&uri, &key, default_draft, documents, &mut state.queue); - state - .queue - .push_back((Arc::clone(&key), key, String::new(), draft)); + for ((fragmentless, refs), result) in grouped.into_iter().zip(results) { + let retrieved = match result { + Ok(retrieved) => retrieved, + Err(error) => { + if let Some((original, uri, kind)) = refs.into_iter().next() { + handle_retrieve_error(&uri, &original, &fragmentless, error, kind)?; + } + continue; } + }; + let (key, draft) = create_resource( + retrieved, + fragmentless, + default_draft, + documents, + known_resources, + &mut state.index, + &mut state.custom_metaschemas, + ); + for (_, uri, _) in &refs { + enqueue_fragment_entry(uri, &key, default_draft, documents, &mut state.queue); } + state + .queue + .push_back((Arc::clone(&key), key, String::new(), draft)); } + Ok(()) +} +/// Inject meta-schemas if referenced, then run a final queue pass to index them. +#[allow(unsafe_code)] +fn finalize_index<'a>( + state: &mut ProcessingState<'a>, + documents: &mut DocumentStore<'a>, + known_resources: &mut KnownResources, + resolution_cache: &mut UriCache, + default_draft: Draft, + visited: VisitedRefs<'static>, +) -> Result<(), Error> { handle_metaschemas( state.refers_metaschemas, documents, @@ -1101,76 +1006,54 @@ fn run_sync_processing_loop<'a>( if !state.queue.is_empty() { // SAFETY: widens 'static → '_ (covariant); set is empty after reuse_local_seen clears it. - let mut local_seen: LocalSeen<'_> = unsafe { reuse_local_seen(local_seen_buf) }; + let mut local_visited: VisitedRefs<'_> = unsafe { reuse_visited_local_refs(visited) }; process_queue(state, documents, known_resources, resolution_cache)?; - process_deferred_refs(state, documents, resolution_cache, &mut local_seen)?; + process_deferred_refs(state, documents, resolution_cache, &mut local_visited)?; } Ok(()) } -pub(super) fn process_resources_mixed<'a>( - pairs: impl IntoIterator, PendingResource<'a>)>, - retriever: &dyn Retrieve, +/// Shared sync processing loop used during registry preparation. After the +/// initial input has been ingested into `state`, this function drives the +/// BFS-fetch cycle until all reachable external resources have been retrieved, +/// then handles meta-schema injection and runs a final queue pass. +fn resolve_and_index<'a>( + state: &mut ProcessingState<'a>, documents: &mut DocumentStore<'a>, known_resources: &mut KnownResources, resolution_cache: &mut UriCache, - draft_override: Option, -) -> Result<(Vec, PreparedIndex<'a>), Error> { - let mut state = ProcessingState::new(); - process_input_resources_mixed( - pairs, - documents, - known_resources, - &mut state, - draft_override, - ); - run_sync_processing_loop( - &mut state, - documents, - known_resources, - resolution_cache, - draft_override.unwrap_or_default(), - retriever, - )?; - Ok((state.custom_metaschemas, state.index_data)) -} + default_draft: Draft, + retriever: &dyn Retrieve, +) -> Result<(), Error> { + let mut visited: VisitedRefs<'static> = VisitedRefs::new(); -#[cfg(feature = "retrieve-async")] -pub(super) async fn process_resources_async_mixed<'a>( - pairs: impl IntoIterator, PendingResource<'a>)>, - retriever: &dyn crate::AsyncRetrieve, - documents: &mut DocumentStore<'a>, - known_resources: &mut KnownResources, - resolution_cache: &mut UriCache, - draft_override: Option, -) -> Result<(Vec, PreparedIndex<'a>), Error> { - let mut state = ProcessingState::new(); - process_input_resources_mixed( - pairs, - documents, - known_resources, - &mut state, - draft_override, - ); - run_async_processing_loop( - &mut state, + while !(state.queue.is_empty() && state.external.is_empty()) { + drain_queue_and_deferred( + state, + documents, + known_resources, + resolution_cache, + &mut visited, + )?; + fetch_external_resources(state, documents, known_resources, default_draft, retriever)?; + } + + finalize_index( + state, documents, known_resources, resolution_cache, - draft_override.unwrap_or_default(), - retriever, + default_draft, + visited, ) - .await?; - Ok((state.custom_metaschemas, state.index_data)) } /// Shared async processing loop used during registry preparation. Batches /// concurrent external retrievals with `join_all` and otherwise mirrors -/// [`run_sync_processing_loop`]. +/// [`resolve_and_index`]. #[cfg(feature = "retrieve-async")] -#[allow(unsafe_code)] -async fn run_async_processing_loop<'a>( +async fn resolve_and_index_async<'a>( state: &mut ProcessingState<'a>, documents: &mut DocumentStore<'a>, known_resources: &mut KnownResources, @@ -1178,93 +1061,28 @@ async fn run_async_processing_loop<'a>( default_draft: Draft, retriever: &dyn crate::AsyncRetrieve, ) -> Result<(), Error> { - type ExternalRefsByBase = AHashMap, Vec<(String, Uri, ReferenceKind)>>; - - let mut local_seen_buf: LocalSeen<'static> = LocalSeen::new(); - - loop { - if state.queue.is_empty() && state.external.is_empty() { - break; - } - - { - // SAFETY: widens 'static → '_ (covariant); set is empty after reuse_local_seen clears it. - let mut local_seen: LocalSeen<'_> = unsafe { reuse_local_seen(local_seen_buf) }; - process_queue(state, documents, known_resources, resolution_cache)?; - process_deferred_refs(state, documents, resolution_cache, &mut local_seen)?; - // SAFETY: clears all '_ refs before narrowing back to 'static to reclaim the buffer. - local_seen_buf = unsafe { reuse_local_seen(local_seen) }; - } - - if !state.external.is_empty() { - let mut grouped = ExternalRefsByBase::new(); - for (original, uri, kind) in state.external.drain() { - let mut fragmentless = uri.clone(); - fragmentless.set_fragment(None); - if !known_resources.contains(&fragmentless) { - grouped - .entry(fragmentless) - .or_default() - .push((original, uri, kind)); - } - } - - let entries: Vec<_> = grouped.into_iter().collect(); - let results = { - let futures = entries - .iter() - .map(|(fragmentless, _)| retriever.retrieve(fragmentless)); - futures::future::join_all(futures).await - }; - - for ((fragmentless, refs), result) in entries.into_iter().zip(results) { - let retrieved = match result { - Ok(retrieved) => retrieved, - Err(error) => { - if let Some((original, uri, kind)) = refs.into_iter().next() { - handle_retrieve_error(&uri, &original, &fragmentless, error, kind)?; - } - continue; - } - }; + let mut visited: VisitedRefs<'static> = VisitedRefs::new(); - let (key, draft) = create_resource( - retrieved, - fragmentless, - default_draft, - documents, - known_resources, - &mut state.index_data, - &mut state.custom_metaschemas, - ); - - for (_, uri, _) in &refs { - enqueue_fragment_entry(uri, &key, default_draft, documents, &mut state.queue); - } - - state - .queue - .push_back((Arc::clone(&key), key, String::new(), draft)); - } - } + while !(state.queue.is_empty() && state.external.is_empty()) { + drain_queue_and_deferred( + state, + documents, + known_resources, + resolution_cache, + &mut visited, + )?; + fetch_external_resources_async(state, documents, known_resources, default_draft, retriever) + .await?; } - handle_metaschemas( - state.refers_metaschemas, + finalize_index( + state, documents, known_resources, + resolution_cache, default_draft, - state, - )?; - - if !state.queue.is_empty() { - // SAFETY: widens 'static → '_ (covariant); set is empty after reuse_local_seen clears it. - let mut local_seen: LocalSeen<'_> = unsafe { reuse_local_seen(local_seen_buf) }; - process_queue(state, documents, known_resources, resolution_cache)?; - process_deferred_refs(state, documents, resolution_cache, &mut local_seen)?; - } - - Ok(()) + visited, + ) } fn handle_retrieve_error( @@ -1293,11 +1111,9 @@ fn handle_retrieve_error( fn with_owned_child_path( path: &JsonPointerNode<'_, '_>, - child: &crate::spec::ChildNode<'_>, + child: &ChildNode<'_>, f: impl FnOnce(&JsonPointerNode<'_, '_>) -> R, ) -> R { - use crate::spec::PathSegment; - let first = match child.first { PathSegment::Key(key) => path.push(key), PathSegment::Index(index) => path.push(index), @@ -1318,7 +1134,7 @@ fn with_owned_child_path( fn collect_external_resources_from_slots<'doc>( base: &Arc>, root: &'doc Value, - references: &crate::spec::ReferenceSlots<'doc>, + references: &ReferenceSlots<'doc>, collected: &mut AHashSet<(String, Uri, ReferenceKind)>, seen: &mut ReferenceTracker, resolution_cache: &mut UriCache, @@ -1327,7 +1143,7 @@ fn collect_external_resources_from_slots<'doc>( draft: Draft, doc_key: &Arc>, deferred_refs: &mut Vec, - local_seen: &mut LocalSeen<'doc>, + visited: &mut VisitedRefs<'doc>, ) -> Result<(), Error> { for (reference, key) in [(references.ref_, "$ref"), (references.schema, "$schema")] { let Some(reference) = reference else { @@ -1346,7 +1162,7 @@ fn collect_external_resources_from_slots<'doc>( continue; } if reference.starts_with('#') { - if mark_local_reference(local_seen, base, reference) { + if mark_visited_local_ref(visited, base, reference) { let ptr = reference.trim_start_matches('#'); if let Some(referenced) = pointer(root, ptr) { let target_draft = draft.detect(referenced); @@ -1433,7 +1249,7 @@ fn collect_external_resources<'doc>( draft: Draft, doc_key: &Arc>, deferred_refs: &mut Vec, - local_seen: &mut LocalSeen<'doc>, + visited: &mut VisitedRefs<'doc>, ) -> Result<(), Error> { if base.scheme().as_str() == "urn" { return Ok(()); @@ -1450,7 +1266,7 @@ fn collect_external_resources<'doc>( } } else if $reference != "#" { if $reference.starts_with('#') { - if mark_local_reference(local_seen, base, $reference) { + if mark_visited_local_ref(visited, base, $reference) { let ptr = $reference.trim_start_matches('#'); if let Some(referenced) = pointer(root, ptr) { let target_draft = draft.detect(referenced); @@ -1541,7 +1357,7 @@ fn collect_external_resources_recursive<'doc>( visited: &mut AHashSet, doc_key: &Arc>, deferred_refs: &mut Vec, - local_seen: &mut LocalSeen<'doc>, + visited_refs: &mut VisitedRefs<'doc>, ) -> Result<(), Error> { let ptr = std::ptr::from_ref::(contents) as usize; if !visited.insert(ptr) { @@ -1565,7 +1381,7 @@ fn collect_external_resources_recursive<'doc>( draft, doc_key, deferred_refs, - local_seen, + visited_refs, )?; for subresource in draft.subresources_of(contents) { @@ -1583,7 +1399,7 @@ fn collect_external_resources_recursive<'doc>( visited, doc_key, deferred_refs, - local_seen, + visited_refs, )?; } Ok(()) @@ -1601,7 +1417,7 @@ fn process_deferred_refs<'a>( state: &mut ProcessingState<'_>, documents: &'a DocumentStore<'a>, resolution_cache: &mut UriCache, - local_seen: &mut LocalSeen<'a>, + local_seen: &mut VisitedRefs<'a>, ) -> Result<(), Error> { while !state.deferred_refs.is_empty() { let batch = std::mem::take(&mut state.deferred_refs); @@ -1648,8 +1464,8 @@ fn mark_reference(seen: &mut ReferenceTracker, base: &Arc>, referenc seen.insert(ReferenceKey::new(base, reference)) } -fn mark_local_reference<'a>( - local_seen: &mut LocalSeen<'a>, +fn mark_visited_local_ref<'a>( + local_seen: &mut VisitedRefs<'a>, base: &Arc>, reference: &'a str, ) -> bool { @@ -1672,6 +1488,21 @@ fn resolve_id( } Ok(Arc::new(resolved)) } + +/// Resolves `$id` against the current base, updates `known_resources`, and returns +/// `(new_base, changed)` where `changed` is true when the URI actually changed. +fn resolve_subresource_id( + current_base_uri: &Arc>, + id: &str, + known_resources: &mut KnownResources, + resolution_cache: &mut UriCache, +) -> Result<(Arc>, bool), Error> { + let new_base = resolve_id(current_base_uri, id, resolution_cache)?; + let changed = new_base != *current_base_uri; + known_resources.insert((*new_base).clone()); + Ok((new_base, changed)) +} + #[cfg(test)] mod tests { use std::{error::Error as _, sync::Arc}; @@ -1681,13 +1512,13 @@ mod tests { use serde_json::{json, Value}; use test_case::test_case; - use crate::{uri::from_str, Anchor, Draft, Registry, Resource, Retrieve}; - - use super::{ - insert_root_index_entries, process_borrowed_document, process_owned_document, - IndexedResource, KnownResources, LocalSeen, ProcessingState, StoredDocument, + use super::*; + use crate::{ + cache::UriCache, + registry::{index::IndexedResource, SPECIFICATIONS}, + uri::from_str, + Anchor, Draft, Registry, Resource, Retrieve, }; - use crate::{cache::UriCache, registry::SPECIFICATIONS}; #[test] fn test_invalid_uri_on_registry_creation() { @@ -1834,15 +1665,17 @@ mod tests { let mut state = ProcessingState::new(); let mut known_resources = KnownResources::default(); let mut resolution_cache = UriCache::new(); - let mut local_seen = LocalSeen::new(); + let mut local_seen = VisitedRefs::new(); known_resources.insert((*doc_key).clone()); - insert_root_index_entries(&mut state.index_data, &doc_key, &document); + state.index.register_document(&doc_key, &document); process_borrowed_document( Arc::clone(&doc_key), &doc_key, - &document, + document + .borrowed_contents() + .expect("test document is borrowed"), "", Draft::Draft202012, &mut state, @@ -1854,7 +1687,7 @@ mod tests { let embedded_uri = Arc::new(from_str("http://example.com/embedded").expect("valid embedded URI")); - match state.index_data.resources.get(&embedded_uri) { + match state.index.resources.get(&embedded_uri) { Some(IndexedResource::Borrowed(resource)) => { assert_eq!( resource.contents(), @@ -1880,12 +1713,11 @@ mod tests { let mut state = ProcessingState::new(); let mut known_resources = KnownResources::default(); let mut resolution_cache = UriCache::new(); - let mut owned_reference_scratch = crate::spec::ReferenceSlots::default(); - let mut owned_child_scratch: Vec> = Vec::new(); - let mut local_seen = LocalSeen::new(); + let mut local_seen = VisitedRefs::new(); + let mut owned_child_scratch = Vec::new(); known_resources.insert((*doc_key).clone()); - insert_root_index_entries(&mut state.index_data, &doc_key, &document); + state.index.register_document(&doc_key, &document); process_owned_document( Arc::clone(&doc_key), @@ -1896,7 +1728,6 @@ mod tests { &mut state, &mut known_resources, &mut resolution_cache, - &mut owned_reference_scratch, &mut owned_child_scratch, &mut local_seen, ) @@ -1904,7 +1735,7 @@ mod tests { let embedded_uri = Arc::new(from_str("http://example.com/embedded").expect("valid embedded URI")); - match state.index_data.resources.get(&embedded_uri) { + match state.index.resources.get(&embedded_uri) { Some(IndexedResource::Owned { .. }) => {} other => panic!("expected owned embedded resource entry, got {other:?}"), } @@ -1925,12 +1756,11 @@ mod tests { let mut state = ProcessingState::new(); let mut known_resources = KnownResources::default(); let mut resolution_cache = UriCache::new(); - let mut owned_reference_scratch = crate::spec::ReferenceSlots::default(); - let mut owned_child_scratch: Vec> = Vec::new(); - let mut local_seen = LocalSeen::new(); + let mut local_seen = VisitedRefs::new(); + let mut owned_child_scratch = Vec::new(); known_resources.insert((*doc_key).clone()); - insert_root_index_entries(&mut state.index_data, &doc_key, &document); + state.index.register_document(&doc_key, &document); process_owned_document( Arc::clone(&doc_key), @@ -1941,7 +1771,6 @@ mod tests { &mut state, &mut known_resources, &mut resolution_cache, - &mut owned_reference_scratch, &mut owned_child_scratch, &mut local_seen, ) @@ -1949,7 +1778,7 @@ mod tests { let embedded_uri = Arc::new(from_str("http://example.com/embedded").expect("valid embedded URI")); - match state.index_data.resources.get(&embedded_uri) { + match state.index.resources.get(&embedded_uri) { Some(IndexedResource::Owned { pointer, .. }) => { assert_eq!( pointer.lookup(document.contents()), diff --git a/crates/jsonschema-referencing/src/registry/index.rs b/crates/jsonschema-referencing/src/registry/index.rs index 4abacdfb..afda8a6d 100644 --- a/crates/jsonschema-referencing/src/registry/index.rs +++ b/crates/jsonschema-referencing/src/registry/index.rs @@ -11,19 +11,19 @@ use std::sync::Arc; use fluent_uri::Uri; +use serde_json::Value; + use crate::{ anchor::Anchor, draft::Draft, pointer::ParsedPointer, small_map::SmallMap, ResourceRef, }; use super::build::StoredDocument; -pub(super) type AnchorName = Box; - /// Lookup tables mapping canonical URIs to resources and anchors. #[derive(Debug, Clone, Default)] -pub(super) struct PreparedIndex<'a> { +pub(super) struct Index<'a> { pub(super) resources: SmallMap>, IndexedResource<'a>>, - pub(super) anchors: SmallMap>, SmallMap>>, + pub(super) anchors: SmallMap>, SmallMap, IndexedAnchor<'a>>>, } /// A schema resource in the index: either borrowed from the caller or owned by the registry. @@ -64,7 +64,7 @@ pub(super) enum IndexedAnchor<'a> { document: Arc>, pointer: ParsedPointer, draft: Draft, - kind: IndexedAnchorKind, + kind: AnchorKind, name: Box, }, } @@ -93,8 +93,8 @@ impl IndexedAnchor<'_> { let contents = pointer.lookup(document.contents())?; let resource = ResourceRef::new(contents, *draft); Some(match kind { - IndexedAnchorKind::Default => Anchor::Default { name, resource }, - IndexedAnchorKind::Dynamic => Anchor::Dynamic { name, resource }, + AnchorKind::Default => Anchor::Default { name, resource }, + AnchorKind::Dynamic => Anchor::Dynamic { name, resource }, }) } } @@ -103,7 +103,95 @@ impl IndexedAnchor<'_> { /// Whether an anchor is a plain anchor (`$anchor`) or a dynamic anchor (`$dynamicAnchor`). #[derive(Debug, Clone, Copy, PartialEq, Eq)] -pub(super) enum IndexedAnchorKind { +pub(super) enum AnchorKind { Default, Dynamic, } + +impl<'a> Index<'a> { + /// Register a document: insert its resource entry and all its anchors. + pub(super) fn register_document( + &mut self, + key: &Arc>, + document: &Arc>, + ) { + if let Some(contents) = document.borrowed_contents() { + self.register_borrowed_subresource(key, document.draft(), true, contents); + } else { + let pointer = ParsedPointer::default(); + self.register_owned_subresource( + key, + document, + &pointer, + document.draft(), + true, + document.contents(), + ); + } + } + + /// Register a subresource discovered during BFS traversal of a borrowed document. + /// If `has_id` is true, the subresource is also registered as a resource entry. + pub(super) fn register_borrowed_subresource( + &mut self, + key: &Arc>, + draft: Draft, + has_id: bool, + contents: &'a Value, + ) { + if has_id { + self.resources.insert( + Arc::clone(key), + IndexedResource::Borrowed(ResourceRef::new(contents, draft)), + ); + } + let anchors = self.anchors.get_or_insert_default(Arc::clone(key)); + for anchor in draft.anchors(contents) { + anchors.insert( + anchor.name().to_string().into_boxed_str(), + IndexedAnchor::Borrowed(anchor), + ); + } + } + + /// Register a subresource discovered during BFS traversal of an owned document. + /// If `has_id` is true, the subresource is also registered as a resource entry. + pub(super) fn register_owned_subresource( + &mut self, + key: &Arc>, + document: &Arc>, + pointer: &ParsedPointer, + draft: Draft, + has_id: bool, + contents: &Value, + ) { + if has_id { + self.resources.insert( + Arc::clone(key), + IndexedResource::Owned { + document: Arc::clone(document), + pointer: pointer.clone(), + draft, + }, + ); + } + let anchors = self.anchors.get_or_insert_default(Arc::clone(key)); + for anchor in draft.anchors(contents) { + let (name, kind) = match anchor { + Anchor::Default { name, .. } => (name, AnchorKind::Default), + Anchor::Dynamic { name, .. } => (name, AnchorKind::Dynamic), + }; + let name = name.to_string().into_boxed_str(); + anchors.insert( + name.clone(), + IndexedAnchor::Owned { + document: Arc::clone(document), + pointer: pointer.clone(), + draft, + kind, + name, + }, + ); + } + } +} diff --git a/crates/jsonschema-referencing/src/registry/input.rs b/crates/jsonschema-referencing/src/registry/input.rs index 904630b4..02884e7b 100644 --- a/crates/jsonschema-referencing/src/registry/input.rs +++ b/crates/jsonschema-referencing/src/registry/input.rs @@ -18,10 +18,10 @@ use crate::{Resource, ResourceRef, Retrieve}; /// A resource waiting to enter the registry. #[derive(Clone)] pub(crate) enum PendingResource<'a> { - OwnedValue(Value), - BorrowedValue(&'a Value), - OwnedResource(Resource), - BorrowedResource(ResourceRef<'a>), + Value(Value), + ValueRef(&'a Value), + Resource(Resource), + ResourceRef(ResourceRef<'a>), } pub(crate) mod private { @@ -50,7 +50,7 @@ impl<'a> private::Sealed<'a> for Resource { pending: &mut AHashMap, PendingResource<'a>>, uri: Uri, ) { - pending.insert(uri, PendingResource::OwnedResource(self)); + pending.insert(uri, PendingResource::Resource(self)); } } @@ -62,7 +62,7 @@ impl<'a> private::Sealed<'a> for &'a Resource { ) { pending.insert( uri, - PendingResource::BorrowedResource(ResourceRef::new(self.contents(), self.draft())), + PendingResource::ResourceRef(ResourceRef::new(self.contents(), self.draft())), ); } } @@ -73,7 +73,7 @@ impl<'a> private::Sealed<'a> for &'a Value { pending: &mut AHashMap, PendingResource<'a>>, uri: Uri, ) { - pending.insert(uri, PendingResource::BorrowedValue(self)); + pending.insert(uri, PendingResource::ValueRef(self)); } } @@ -83,7 +83,7 @@ impl<'a> private::Sealed<'a> for ResourceRef<'a> { pending: &mut AHashMap, PendingResource<'a>>, uri: Uri, ) { - pending.insert(uri, PendingResource::BorrowedResource(self)); + pending.insert(uri, PendingResource::ResourceRef(self)); } } @@ -93,7 +93,7 @@ impl<'a> private::Sealed<'a> for Value { pending: &mut AHashMap, PendingResource<'a>>, uri: Uri, ) { - pending.insert(uri, PendingResource::OwnedValue(self)); + pending.insert(uri, PendingResource::Value(self)); } } diff --git a/crates/jsonschema-referencing/src/registry/mod.rs b/crates/jsonschema-referencing/src/registry/mod.rs index be0f7468..008f0e86 100644 --- a/crates/jsonschema-referencing/src/registry/mod.rs +++ b/crates/jsonschema-referencing/src/registry/mod.rs @@ -9,21 +9,16 @@ use serde_json::Value; use crate::{ cache::{SharedUriCache, UriCache}, - uri, + meta, uri, vocabularies::{self, VocabularySet}, Anchor, DefaultRetriever, Draft, Error, Resolver, ResourceRef, Retrieve, }; mod build; -#[cfg(feature = "retrieve-async")] -use build::process_resources_async_mixed; -use build::{ - build_prepared_index_for_documents, process_resources_mixed, validate_custom_metaschemas, - DocumentStore, KnownResources, StoredDocument, -}; +use build::{DocumentStore, KnownResources, StoredDocument}; mod index; -use index::{IndexedAnchor, IndexedResource, PreparedIndex}; +use index::{Index, IndexedAnchor, IndexedResource}; mod input; #[cfg(feature = "retrieve-async")] @@ -33,7 +28,7 @@ pub(crate) use input::{IntoRetriever, PendingResource}; /// Pre-loaded registry containing all JSON Schema meta-schemas and their vocabularies pub static SPECIFICATIONS: LazyLock> = - LazyLock::new(|| Registry::build_from_meta_schemas(crate::meta::META_SCHEMAS_ALL.as_slice())); + LazyLock::new(|| Registry::from_meta_schemas(meta::META_SCHEMAS_ALL.as_slice())); #[derive(Clone)] pub struct RegistryBuilder<'a> { @@ -86,7 +81,7 @@ pub struct Registry<'a> { baseline: Option<&'a Registry<'a>>, resolution_cache: SharedUriCache, known_resources: KnownResources, - index_data: PreparedIndex<'a>, + index: Index<'a>, } impl<'a> RegistryBuilder<'a> { @@ -190,15 +185,29 @@ impl<'a> RegistryBuilder<'a> { /// /// Returns an error if URI processing, retrieval, or custom meta-schema validation fails. pub fn prepare(self) -> Result, Error> { - if let Some(baseline) = self.baseline { - baseline.try_with_pending_resources_and_retriever( - self.pending, - &*self.retriever, - self.draft, - ) - } else { - Registry::try_from_pending_resources_impl(self.pending, &*self.retriever, self.draft) - } + // When extending an existing registry, seed known resources from the baseline so the + // retriever skips URIs already owned by the parent. + let mut known_resources = self + .baseline + .map(|b| b.known_resources.clone()) + .unwrap_or_default(); + let mut documents = DocumentStore::new(); + let mut resolution_cache = UriCache::new(); + let (custom_metaschemas, index_data) = build::index_resources( + self.pending, + &*self.retriever, + &mut documents, + &mut known_resources, + &mut resolution_cache, + self.draft, + )?; + build::validate_custom_metaschemas(&custom_metaschemas, &known_resources)?; + Ok(Registry { + baseline: self.baseline, + resolution_cache: resolution_cache.into_shared(), + known_resources, + index: index_data, + }) } #[cfg(feature = "retrieve-async")] @@ -211,22 +220,37 @@ impl<'a> RegistryBuilder<'a> { let retriever = self .async_retriever .unwrap_or_else(|| Arc::new(DefaultRetriever)); - if let Some(baseline) = self.baseline { - baseline - .try_with_pending_resources_and_retriever_async( - self.pending, - &*retriever, - self.draft, - ) - .await - } else { - Registry::try_from_pending_resources_async_impl(self.pending, &*retriever, self.draft) - .await - } + let mut known_resources = self + .baseline + .map(|b| b.known_resources.clone()) + .unwrap_or_default(); + let mut documents = DocumentStore::new(); + let mut resolution_cache = UriCache::new(); + let (custom_metaschemas, index_data) = build::index_resources_async( + self.pending, + &*retriever, + &mut documents, + &mut known_resources, + &mut resolution_cache, + self.draft, + ) + .await?; + build::validate_custom_metaschemas(&custom_metaschemas, &known_resources)?; + Ok(Registry { + baseline: self.baseline, + resolution_cache: resolution_cache.into_shared(), + known_resources, + index: index_data, + }) } } impl<'a> Registry<'a> { + #[allow(clippy::new_ret_no_self)] + #[must_use] + pub fn new<'b>() -> RegistryBuilder<'b> { + RegistryBuilder::new() + } /// Add a resource to a prepared registry, returning a builder that must be prepared again. /// /// # Errors @@ -258,75 +282,9 @@ impl<'a> Registry<'a> { { RegistryBuilder::from_registry(self).extend(pairs) } -} - -impl Registry<'static> { - #[allow(clippy::new_ret_no_self)] - #[must_use] - pub fn new<'a>() -> RegistryBuilder<'a> { - RegistryBuilder::new() - } - - fn try_from_pending_resources_impl<'a>( - pairs: impl IntoIterator, PendingResource<'a>)>, - retriever: &dyn Retrieve, - draft: Option, - ) -> Result, Error> { - let mut documents = DocumentStore::new(); - let mut known_resources = KnownResources::new(); - let mut resolution_cache = UriCache::new(); - - let (custom_metaschemas, index_data) = process_resources_mixed( - pairs, - retriever, - &mut documents, - &mut known_resources, - &mut resolution_cache, - draft, - )?; - - validate_custom_metaschemas(&custom_metaschemas, &known_resources)?; - - Ok(Registry { - baseline: None, - resolution_cache: resolution_cache.into_shared(), - known_resources, - index_data, - }) - } - - #[cfg(feature = "retrieve-async")] - async fn try_from_pending_resources_async_impl<'a>( - pairs: impl IntoIterator, PendingResource<'a>)>, - retriever: &dyn crate::AsyncRetrieve, - draft: Option, - ) -> Result, Error> { - let mut documents = DocumentStore::new(); - let mut known_resources = KnownResources::new(); - let mut resolution_cache = UriCache::new(); - - let (custom_metaschemas, index_data) = process_resources_async_mixed( - pairs, - retriever, - &mut documents, - &mut known_resources, - &mut resolution_cache, - draft, - ) - .await?; - - validate_custom_metaschemas(&custom_metaschemas, &known_resources)?; - - Ok(Registry { - baseline: None, - resolution_cache: resolution_cache.into_shared(), - known_resources, - index_data, - }) - } /// Build a registry with all the given meta-schemas from specs. - pub(crate) fn build_from_meta_schemas(schemas: &[(&'static str, &'static Value)]) -> Self { + pub(crate) fn from_meta_schemas(schemas: &[(&'static str, &'static Value)]) -> Self { let mut documents = DocumentStore::with_capacity(schemas.len()); let mut known_resources = KnownResources::with_capacity(schemas.len()); @@ -340,77 +298,17 @@ impl Registry<'static> { } let mut resolution_cache = UriCache::with_capacity(35); - let index_data = build_prepared_index_for_documents(&documents, &mut resolution_cache) - .expect("meta-schema index data must build"); + let index_data = + build::build_prepared_index_for_documents(&documents, &mut resolution_cache) + .expect("meta-schema index data must build"); Self { baseline: None, resolution_cache: resolution_cache.into_shared(), known_resources, - index_data, + index: index_data, } } -} - -impl<'a> Registry<'a> { - fn try_with_pending_resources_and_retriever( - &'a self, - pairs: impl IntoIterator, PendingResource<'a>)>, - retriever: &dyn Retrieve, - draft: Option, - ) -> Result, Error> { - let mut documents = DocumentStore::new(); - let mut resolution_cache = UriCache::new(); - let mut known_resources = self.known_resources.clone(); - - let (custom_metaschemas, index_data) = process_resources_mixed( - pairs, - retriever, - &mut documents, - &mut known_resources, - &mut resolution_cache, - draft, - )?; - validate_custom_metaschemas(&custom_metaschemas, &known_resources)?; - - Ok(Registry { - baseline: Some(self), - resolution_cache: resolution_cache.into_shared(), - known_resources, - index_data, - }) - } - - #[cfg(feature = "retrieve-async")] - async fn try_with_pending_resources_and_retriever_async( - &'a self, - pairs: impl IntoIterator, PendingResource<'a>)>, - retriever: &dyn crate::AsyncRetrieve, - draft: Option, - ) -> Result, Error> { - let mut documents = DocumentStore::new(); - let mut resolution_cache = UriCache::new(); - let mut known_resources = self.known_resources.clone(); - - let (custom_metaschemas, index_data) = process_resources_async_mixed( - pairs, - retriever, - &mut documents, - &mut known_resources, - &mut resolution_cache, - draft, - ) - .await?; - validate_custom_metaschemas(&custom_metaschemas, &known_resources)?; - - Ok(Registry { - baseline: Some(self), - resolution_cache: resolution_cache.into_shared(), - known_resources, - index_data, - }) - } - /// Returns `true` if the registry contains a resource at the given URI. /// /// Returns `false` if the URI is malformed. @@ -485,7 +383,7 @@ impl<'a> Registry<'a> { #[inline] pub(crate) fn resource_by_uri(&self, uri: &Uri) -> Option> { - self.index_data + self.index .resources .get(uri) .and_then(IndexedResource::resolve) @@ -496,7 +394,7 @@ impl<'a> Registry<'a> { } pub(crate) fn contains_anchor_uri(&self, uri: &Uri, name: &str) -> bool { - self.index_data + self.index .anchors .get(uri) .is_some_and(|entries| entries.contains_key(name)) @@ -527,7 +425,7 @@ impl<'a> Registry<'a> { } fn local_anchor_by_uri(&self, uri: &Uri, name: &str) -> Option> { - self.index_data + self.index .anchors .get(uri) .and_then(|entries| entries.get(name)) diff --git a/crates/jsonschema-referencing/src/spec/draft201909.rs b/crates/jsonschema-referencing/src/spec/draft201909.rs index caad3f53..00783297 100644 --- a/crates/jsonschema-referencing/src/spec/draft201909.rs +++ b/crates/jsonschema-referencing/src/spec/draft201909.rs @@ -62,6 +62,75 @@ pub(crate) fn object_info(schema: &Map) -> ObjectInfo<'_> { } } +fn visit_child<'a, E>( + key: &'a str, + value: &'a Value, + draft: Draft, + f: &mut impl FnMut(&'a str, Option<&'a str>, Option, &'a Value, Draft) -> Result<(), E>, +) -> Result<(), E> { + match key { + "additionalItems" + | "additionalProperties" + | "contains" + | "contentSchema" + | "else" + | "if" + | "not" + | "propertyNames" + | "then" + | "unevaluatedItems" + | "unevaluatedProperties" => { + f(key, None, None, value, draft.detect(value))?; + } + "allOf" | "anyOf" | "oneOf" => { + if let Some(arr) = value.as_array() { + for (index, item) in arr.iter().enumerate() { + f(key, None, Some(index), item, draft.detect(item))?; + } + } + } + "$defs" | "definitions" | "dependentSchemas" | "patternProperties" | "properties" => { + if let Some(obj) = value.as_object() { + for (child_key, child_value) in obj { + f( + key, + Some(child_key.as_str()), + None, + child_value, + draft.detect(child_value), + )?; + } + } + } + "items" => match value { + Value::Array(arr) => { + for (index, item) in arr.iter().enumerate() { + f("items", None, Some(index), item, draft.detect(item))?; + } + } + _ => f("items", None, None, value, draft.detect(value))?, + }, + "dependencies" => { + if let Some(obj) = value.as_object() { + for (child_key, child_value) in obj { + if !child_value.is_object() { + continue; + } + f( + "dependencies", + Some(child_key.as_str()), + None, + child_value, + draft.detect(child_value), + )?; + } + } + } + _ => {} + } + Ok(()) +} + pub(crate) fn scan_borrowed_object_into_scratch_map<'a>( schema: &'a Map, draft: Draft, @@ -80,52 +149,12 @@ pub(crate) fn scan_borrowed_object_into_scratch_map<'a>( references.schema = Some(reference); } } - "additionalItems" - | "additionalProperties" - | "contains" - | "contentSchema" - | "else" - | "if" - | "not" - | "propertyNames" - | "then" - | "unevaluatedItems" - | "unevaluatedProperties" => { - children.push((value, draft.detect(value))); - } - "allOf" | "anyOf" | "oneOf" => { - if let Some(arr) = value.as_array() { - for item in arr { - children.push((item, draft.detect(item))); - } - } - } - "$defs" | "definitions" | "dependentSchemas" | "patternProperties" | "properties" => { - if let Some(obj) = value.as_object() { - for child_value in obj.values() { - children.push((child_value, draft.detect(child_value))); - } - } + other => { + let _ = visit_child(other, value, draft, &mut |_kw, _nk, _idx, v, d| { + children.push((v, d)); + Ok::<(), std::convert::Infallible>(()) + }); } - "items" => match value { - Value::Array(arr) => { - for item in arr { - children.push((item, draft.detect(item))); - } - } - _ => children.push((value, draft.detect(value))), - }, - "dependencies" => { - if let Some(obj) = value.as_object() { - for child_value in obj.values() { - if !child_value.is_object() { - continue; - } - children.push((child_value, draft.detect(child_value))); - } - } - } - _ => {} } } } @@ -153,72 +182,16 @@ pub(crate) fn scan_owned_object_into_scratch_map<'a>( references.schema = Some(reference); } } - "additionalItems" - | "additionalProperties" - | "contains" - | "contentSchema" - | "else" - | "if" - | "not" - | "propertyNames" - | "then" - | "unevaluatedItems" - | "unevaluatedProperties" => { - children.push(ChildNode::key(key.as_str(), value, draft.detect(value))); - } - "allOf" | "anyOf" | "oneOf" => { - if let Some(arr) = value.as_array() { - for (index, item) in arr.iter().enumerate() { - children.push(ChildNode::key_index( - key.as_str(), - index, - item, - draft.detect(item), - )); - } - } - } - "$defs" | "definitions" | "dependentSchemas" | "patternProperties" | "properties" => { - if let Some(obj) = value.as_object() { - for (child_key, child_value) in obj { - children.push(ChildNode::key_key( - key.as_str(), - child_key.as_str(), - child_value, - draft.detect(child_value), - )); - } - } - } - "items" => match value { - Value::Array(arr) => { - for (index, item) in arr.iter().enumerate() { - children.push(ChildNode::key_index( - "items", - index, - item, - draft.detect(item), - )); + other => { + let _ = visit_child(other, value, draft, &mut |kw, nk, idx, v, d| { + match (nk, idx) { + (Some(k), _) => children.push(ChildNode::key_key(kw, k, v, d)), + (_, Some(i)) => children.push(ChildNode::key_index(kw, i, v, d)), + _ => children.push(ChildNode::key(kw, v, d)), } - } - _ => children.push(ChildNode::key("items", value, draft.detect(value))), - }, - "dependencies" => { - if let Some(obj) = value.as_object() { - for (child_key, child_value) in obj { - if !child_value.is_object() { - continue; - } - children.push(ChildNode::key_key( - key.as_str(), - child_key.as_str(), - child_value, - draft.detect(child_value), - )); - } - } + Ok::<(), std::convert::Infallible>(()) + }); } - _ => {} } } @@ -234,52 +207,9 @@ where F: FnMut(&'a Value, Draft) -> Result<(), E>, { for (key, value) in schema { - match key.as_str() { - "additionalItems" - | "additionalProperties" - | "contains" - | "contentSchema" - | "else" - | "if" - | "not" - | "propertyNames" - | "then" - | "unevaluatedItems" - | "unevaluatedProperties" => f(value, draft.detect(value))?, - "allOf" | "anyOf" | "oneOf" => { - if let Some(arr) = value.as_array() { - for item in arr { - f(item, draft.detect(item))?; - } - } - } - "$defs" | "definitions" | "dependentSchemas" | "patternProperties" | "properties" => { - if let Some(obj) = value.as_object() { - for child_value in obj.values() { - f(child_value, draft.detect(child_value))?; - } - } - } - "items" => match value { - Value::Array(arr) => { - for item in arr { - f(item, draft.detect(item))?; - } - } - _ => f(value, draft.detect(value))?, - }, - "dependencies" => { - if let Some(obj) = value.as_object() { - for child_value in obj.values() { - if !child_value.is_object() { - continue; - } - f(child_value, draft.detect(child_value))?; - } - } - } - _ => {} - } + visit_child(key.as_str(), value, draft, &mut |_kw, _nk, _idx, v, d| { + f(v, d) + })?; } Ok(()) } @@ -294,65 +224,20 @@ where F: FnMut(&JsonPointerNode<'_, '_>, &'a Value, Draft) -> Result<(), E>, { for (key, value) in schema { - match key.as_str() { - "additionalItems" - | "additionalProperties" - | "contains" - | "contentSchema" - | "else" - | "if" - | "not" - | "propertyNames" - | "then" - | "unevaluatedItems" - | "unevaluatedProperties" => { - let child_path = path.push(key.as_str()); - f(&child_path, value, draft.detect(value))?; - } - "allOf" | "anyOf" | "oneOf" => { - if let Some(arr) = value.as_array() { - let parent_path = path.push(key.as_str()); - for (i, item) in arr.iter().enumerate() { - let child_path = parent_path.push(i); - f(&child_path, item, draft.detect(item))?; - } + visit_child(key.as_str(), value, draft, &mut |kw, nk, idx, v, d| { + let parent = path.push(kw); + match (nk, idx) { + (Some(k), _) => { + let child_path = parent.push(k); + f(&child_path, v, d) } - } - "$defs" | "definitions" | "dependentSchemas" | "patternProperties" | "properties" => { - if let Some(obj) = value.as_object() { - let parent_path = path.push(key.as_str()); - for (child_key, child_value) in obj { - let child_path = parent_path.push(child_key.as_str()); - f(&child_path, child_value, draft.detect(child_value))?; - } + (_, Some(i)) => { + let child_path = parent.push(i); + f(&child_path, v, d) } + _ => f(&parent, v, d), } - "items" => { - let parent_path = path.push("items"); - match value { - Value::Array(arr) => { - for (i, item) in arr.iter().enumerate() { - let child_path = parent_path.push(i); - f(&child_path, item, draft.detect(item))?; - } - } - _ => f(&parent_path, value, draft.detect(value))?, - } - } - "dependencies" => { - if let Some(obj) = value.as_object() { - let parent_path = path.push(key.as_str()); - for (child_key, child_value) in obj { - if !child_value.is_object() { - continue; - } - let child_path = parent_path.push(child_key.as_str()); - f(&child_path, child_value, draft.detect(child_value))?; - } - } - } - _ => {} - } + })?; } Ok(()) } diff --git a/crates/jsonschema-referencing/src/spec/draft202012.rs b/crates/jsonschema-referencing/src/spec/draft202012.rs index d5460109..894fa04e 100644 --- a/crates/jsonschema-referencing/src/spec/draft202012.rs +++ b/crates/jsonschema-referencing/src/spec/draft202012.rs @@ -10,6 +10,51 @@ use crate::{ Error, JsonPointerNode, Resolver, ResourceRef, Segments, }; +fn visit_child<'a, E>( + key: &'a str, + value: &'a Value, + draft: Draft, + f: &mut impl FnMut(&'a str, Option<&'a str>, Option, &'a Value, Draft) -> Result<(), E>, +) -> Result<(), E> { + match key { + "additionalProperties" + | "contains" + | "contentSchema" + | "else" + | "if" + | "items" + | "not" + | "propertyNames" + | "then" + | "unevaluatedItems" + | "unevaluatedProperties" => { + f(key, None, None, value, draft.detect(value))?; + } + "allOf" | "anyOf" | "oneOf" | "prefixItems" => { + if let Some(arr) = value.as_array() { + for (index, item) in arr.iter().enumerate() { + f(key, None, Some(index), item, draft.detect(item))?; + } + } + } + "$defs" | "definitions" | "dependentSchemas" | "patternProperties" | "properties" => { + if let Some(obj) = value.as_object() { + for (child_key, child_value) in obj { + f( + key, + Some(child_key.as_str()), + None, + child_value, + draft.detect(child_value), + )?; + } + } + } + _ => {} + } + Ok(()) +} + pub(crate) fn scan_borrowed_object_into_scratch_map<'a>( schema: &'a Map, draft: Draft, @@ -28,34 +73,12 @@ pub(crate) fn scan_borrowed_object_into_scratch_map<'a>( references.schema = Some(reference); } } - "additionalProperties" - | "contains" - | "contentSchema" - | "else" - | "if" - | "items" - | "not" - | "propertyNames" - | "then" - | "unevaluatedItems" - | "unevaluatedProperties" => { - children.push((value, draft.detect(value))); - } - "allOf" | "anyOf" | "oneOf" | "prefixItems" => { - if let Some(arr) = value.as_array() { - for item in arr { - children.push((item, draft.detect(item))); - } - } + other => { + let _ = visit_child(other, value, draft, &mut |_kw, _nk, _idx, v, d| { + children.push((v, d)); + Ok::<(), std::convert::Infallible>(()) + }); } - "$defs" | "definitions" | "dependentSchemas" | "patternProperties" | "properties" => { - if let Some(obj) = value.as_object() { - for child_value in obj.values() { - children.push((child_value, draft.detect(child_value))); - } - } - } - _ => {} } } } @@ -126,44 +149,16 @@ pub(crate) fn scan_owned_object_into_scratch_map<'a>( references.schema = Some(reference); } } - "additionalProperties" - | "contains" - | "contentSchema" - | "else" - | "if" - | "items" - | "not" - | "propertyNames" - | "then" - | "unevaluatedItems" - | "unevaluatedProperties" => { - children.push(ChildNode::key(key.as_str(), value, draft.detect(value))); - } - "allOf" | "anyOf" | "oneOf" | "prefixItems" => { - if let Some(arr) = value.as_array() { - for (index, item) in arr.iter().enumerate() { - children.push(ChildNode::key_index( - key.as_str(), - index, - item, - draft.detect(item), - )); + other => { + let _ = visit_child(other, value, draft, &mut |kw, nk, idx, v, d| { + match (nk, idx) { + (Some(k), _) => children.push(ChildNode::key_key(kw, k, v, d)), + (_, Some(i)) => children.push(ChildNode::key_index(kw, i, v, d)), + _ => children.push(ChildNode::key(kw, v, d)), } - } + Ok::<(), std::convert::Infallible>(()) + }); } - "$defs" | "definitions" | "dependentSchemas" | "patternProperties" | "properties" => { - if let Some(obj) = value.as_object() { - for (child_key, child_value) in obj { - children.push(ChildNode::key_key( - key.as_str(), - child_key.as_str(), - child_value, - draft.detect(child_value), - )); - } - } - } - _ => {} } } @@ -180,41 +175,20 @@ where F: FnMut(&JsonPointerNode<'_, '_>, &'a Value, Draft) -> Result<(), E>, { for (key, value) in schema { - match key.as_str() { - "additionalProperties" - | "contains" - | "contentSchema" - | "else" - | "if" - | "items" - | "not" - | "propertyNames" - | "then" - | "unevaluatedItems" - | "unevaluatedProperties" => { - let child_path = path.push(key.as_str()); - f(&child_path, value, draft.detect(value))?; - } - "allOf" | "anyOf" | "oneOf" | "prefixItems" => { - if let Some(arr) = value.as_array() { - let parent_path = path.push(key.as_str()); - for (i, item) in arr.iter().enumerate() { - let child_path = parent_path.push(i); - f(&child_path, item, draft.detect(item))?; - } + visit_child(key.as_str(), value, draft, &mut |kw, nk, idx, v, d| { + let parent = path.push(kw); + match (nk, idx) { + (Some(k), _) => { + let child_path = parent.push(k); + f(&child_path, v, d) } - } - "$defs" | "definitions" | "dependentSchemas" | "patternProperties" | "properties" => { - if let Some(obj) = value.as_object() { - let parent_path = path.push(key.as_str()); - for (child_key, child_value) in obj { - let child_path = parent_path.push(child_key.as_str()); - f(&child_path, child_value, draft.detect(child_value))?; - } + (_, Some(i)) => { + let child_path = parent.push(i); + f(&child_path, v, d) } + _ => f(&parent, v, d), } - _ => {} - } + })?; } Ok(()) } @@ -228,34 +202,9 @@ where F: FnMut(&'a Value, Draft) -> Result<(), E>, { for (key, value) in schema { - match key.as_str() { - "additionalProperties" - | "contains" - | "contentSchema" - | "else" - | "if" - | "items" - | "not" - | "propertyNames" - | "then" - | "unevaluatedItems" - | "unevaluatedProperties" => f(value, draft.detect(value))?, - "allOf" | "anyOf" | "oneOf" | "prefixItems" => { - if let Some(arr) = value.as_array() { - for item in arr { - f(item, draft.detect(item))?; - } - } - } - "$defs" | "definitions" | "dependentSchemas" | "patternProperties" | "properties" => { - if let Some(obj) = value.as_object() { - for child_value in obj.values() { - f(child_value, draft.detect(child_value))?; - } - } - } - _ => {} - } + visit_child(key.as_str(), value, draft, &mut |_kw, _nk, _idx, v, d| { + f(v, d) + })?; } Ok(()) } diff --git a/crates/jsonschema-referencing/src/spec/draft4.rs b/crates/jsonschema-referencing/src/spec/draft4.rs index 1e1ff784..ac5e7966 100644 --- a/crates/jsonschema-referencing/src/spec/draft4.rs +++ b/crates/jsonschema-referencing/src/spec/draft4.rs @@ -65,6 +65,76 @@ pub(crate) fn object_info(schema: &Map) -> ObjectInfo<'_> { } } +fn visit_child<'a, E>( + key: &'a str, + value: &'a Value, + draft: Draft, + f: &mut impl FnMut(&'a str, Option<&'a str>, Option, &'a Value, Draft) -> Result<(), E>, +) -> Result<(), E> { + match key { + "additionalItems" | "additionalProperties" if value.is_object() => { + f(key, None, None, value, draft.detect(value))?; + } + "contains" + | "contentSchema" + | "else" + | "if" + | "not" + | "propertyNames" + | "then" + | "unevaluatedItems" + | "unevaluatedProperties" => { + f(key, None, None, value, draft.detect(value))?; + } + "allOf" | "anyOf" | "oneOf" | "prefixItems" => { + if let Some(arr) = value.as_array() { + for (index, item) in arr.iter().enumerate() { + f(key, None, Some(index), item, draft.detect(item))?; + } + } + } + "$defs" | "definitions" | "dependentSchemas" | "patternProperties" | "properties" => { + if let Some(obj) = value.as_object() { + for (child_key, child_value) in obj { + f( + key, + Some(child_key.as_str()), + None, + child_value, + draft.detect(child_value), + )?; + } + } + } + "items" => match value { + Value::Array(arr) => { + for (index, item) in arr.iter().enumerate() { + f(key, None, Some(index), item, draft.detect(item))?; + } + } + _ => f(key, None, None, value, draft.detect(value))?, + }, + "dependencies" => { + if let Some(obj) = value.as_object() { + for (child_key, child_value) in obj { + if !child_value.is_object() { + continue; + } + f( + key, + Some(child_key.as_str()), + None, + child_value, + draft.detect(child_value), + )?; + } + } + } + _ => {} + } + Ok(()) +} + pub(crate) fn scan_borrowed_object_into_scratch_map<'a>( schema: &'a Map, draft: Draft, @@ -83,53 +153,12 @@ pub(crate) fn scan_borrowed_object_into_scratch_map<'a>( references.schema = Some(reference); } } - "additionalItems" | "additionalProperties" if value.is_object() => { - children.push((value, draft.detect(value))); - } - "contains" - | "contentSchema" - | "else" - | "if" - | "not" - | "propertyNames" - | "then" - | "unevaluatedItems" - | "unevaluatedProperties" => { - children.push((value, draft.detect(value))); - } - "allOf" | "anyOf" | "oneOf" | "prefixItems" => { - if let Some(arr) = value.as_array() { - for item in arr { - children.push((item, draft.detect(item))); - } - } - } - "$defs" | "definitions" | "dependentSchemas" | "patternProperties" | "properties" => { - if let Some(obj) = value.as_object() { - for child_value in obj.values() { - children.push((child_value, draft.detect(child_value))); - } - } + other => { + let _ = visit_child(other, value, draft, &mut |_kw, _nk, _idx, v, d| { + children.push((v, d)); + Ok::<(), std::convert::Infallible>(()) + }); } - "items" => match value { - Value::Array(arr) => { - for item in arr { - children.push((item, draft.detect(item))); - } - } - _ => children.push((value, draft.detect(value))), - }, - "dependencies" => { - if let Some(obj) = value.as_object() { - for child_value in obj.values() { - if !child_value.is_object() { - continue; - } - children.push((child_value, draft.detect(child_value))); - } - } - } - _ => {} } } } @@ -157,73 +186,16 @@ pub(crate) fn scan_owned_object_into_scratch_map<'a>( references.schema = Some(reference); } } - "additionalItems" | "additionalProperties" if value.is_object() => { - children.push(ChildNode::key(key.as_str(), value, draft.detect(value))); - } - "contains" - | "contentSchema" - | "else" - | "if" - | "not" - | "propertyNames" - | "then" - | "unevaluatedItems" - | "unevaluatedProperties" => { - children.push(ChildNode::key(key.as_str(), value, draft.detect(value))); - } - "allOf" | "anyOf" | "oneOf" | "prefixItems" => { - if let Some(arr) = value.as_array() { - for (index, item) in arr.iter().enumerate() { - children.push(ChildNode::key_index( - key.as_str(), - index, - item, - draft.detect(item), - )); - } - } - } - "$defs" | "definitions" | "dependentSchemas" | "patternProperties" | "properties" => { - if let Some(obj) = value.as_object() { - for (child_key, child_value) in obj { - children.push(ChildNode::key_key( - key.as_str(), - child_key.as_str(), - child_value, - draft.detect(child_value), - )); - } - } - } - "items" => match value { - Value::Array(arr) => { - for (index, item) in arr.iter().enumerate() { - children.push(ChildNode::key_index( - "items", - index, - item, - draft.detect(item), - )); + other => { + let _ = visit_child(other, value, draft, &mut |kw, nk, idx, v, d| { + match (nk, idx) { + (Some(k), _) => children.push(ChildNode::key_key(kw, k, v, d)), + (_, Some(i)) => children.push(ChildNode::key_index(kw, i, v, d)), + _ => children.push(ChildNode::key(kw, v, d)), } - } - _ => children.push(ChildNode::key("items", value, draft.detect(value))), - }, - "dependencies" => { - if let Some(obj) = value.as_object() { - for (child_key, child_value) in obj { - if !child_value.is_object() { - continue; - } - children.push(ChildNode::key_key( - key.as_str(), - child_key.as_str(), - child_value, - draft.detect(child_value), - )); - } - } + Ok::<(), std::convert::Infallible>(()) + }); } - _ => {} } } @@ -244,55 +216,9 @@ where F: FnMut(&'a Value, Draft) -> Result<(), E>, { for (key, value) in schema { - match key.as_str() { - "additionalItems" | "additionalProperties" if value.is_object() => { - f(value, draft.detect(value))?; - } - "contains" - | "contentSchema" - | "else" - | "if" - | "not" - | "propertyNames" - | "then" - | "unevaluatedItems" - | "unevaluatedProperties" => { - f(value, draft.detect(value))?; - } - "allOf" | "anyOf" | "oneOf" | "prefixItems" => { - if let Some(arr) = value.as_array() { - for item in arr { - f(item, draft.detect(item))?; - } - } - } - "$defs" | "definitions" | "dependentSchemas" | "patternProperties" | "properties" => { - if let Some(obj) = value.as_object() { - for child_value in obj.values() { - f(child_value, draft.detect(child_value))?; - } - } - } - "items" => match value { - Value::Array(arr) => { - for item in arr { - f(item, draft.detect(item))?; - } - } - _ => f(value, draft.detect(value))?, - }, - "dependencies" => { - if let Some(obj) = value.as_object() { - for child_value in obj.values() { - if !child_value.is_object() { - continue; - } - f(child_value, draft.detect(child_value))?; - } - } - } - _ => {} - } + visit_child(key.as_str(), value, draft, &mut |_kw, _nk, _idx, v, d| { + f(v, d) + })?; } Ok(()) } @@ -307,67 +233,20 @@ where F: FnMut(&JsonPointerNode<'_, '_>, &'a Value, Draft) -> Result<(), E>, { for (key, value) in schema { - match key.as_str() { - "additionalItems" | "additionalProperties" if value.is_object() => { - let child_path = path.push(key.as_str()); - f(&child_path, value, draft.detect(value))?; - } - "contains" - | "contentSchema" - | "else" - | "if" - | "not" - | "propertyNames" - | "then" - | "unevaluatedItems" - | "unevaluatedProperties" => { - let child_path = path.push(key.as_str()); - f(&child_path, value, draft.detect(value))?; - } - "allOf" | "anyOf" | "oneOf" | "prefixItems" => { - if let Some(arr) = value.as_array() { - let parent_path = path.push(key.as_str()); - for (i, item) in arr.iter().enumerate() { - let child_path = parent_path.push(i); - f(&child_path, item, draft.detect(item))?; - } + visit_child(key.as_str(), value, draft, &mut |kw, nk, idx, v, d| { + let parent = path.push(kw); + match (nk, idx) { + (Some(k), _) => { + let child_path = parent.push(k); + f(&child_path, v, d) } - } - "$defs" | "definitions" | "dependentSchemas" | "patternProperties" | "properties" => { - if let Some(obj) = value.as_object() { - let parent_path = path.push(key.as_str()); - for (child_key, child_value) in obj { - let child_path = parent_path.push(child_key.as_str()); - f(&child_path, child_value, draft.detect(child_value))?; - } + (_, Some(i)) => { + let child_path = parent.push(i); + f(&child_path, v, d) } + _ => f(&parent, v, d), } - "items" => { - let parent_path = path.push(key.as_str()); - match value { - Value::Array(arr) => { - for (i, item) in arr.iter().enumerate() { - let child_path = parent_path.push(i); - f(&child_path, item, draft.detect(item))?; - } - } - _ => f(&parent_path, value, draft.detect(value))?, - } - } - "dependencies" => { - if let Some(obj) = value.as_object() { - let parent_path = path.push(key.as_str()); - for (child_key, child_value) in obj { - if !child_value.is_object() { - continue; - } - let child_path = parent_path.push(child_key.as_str()); - f(&child_path, child_value, draft.detect(child_value))?; - } - } - } - _ => {} - } + })?; } Ok(()) } diff --git a/crates/jsonschema-referencing/src/spec/draft6.rs b/crates/jsonschema-referencing/src/spec/draft6.rs index eaf0af52..e0347bb1 100644 --- a/crates/jsonschema-referencing/src/spec/draft6.rs +++ b/crates/jsonschema-referencing/src/spec/draft6.rs @@ -58,6 +58,65 @@ pub(crate) fn object_info(schema: &Map) -> ObjectInfo<'_> { } } +fn visit_child<'a, E>( + key: &'a str, + value: &'a Value, + draft: Draft, + f: &mut impl FnMut(&'a str, Option<&'a str>, Option, &'a Value, Draft) -> Result<(), E>, +) -> Result<(), E> { + match key { + "additionalItems" | "additionalProperties" | "contains" | "not" | "propertyNames" => { + f(key, None, None, value, draft.detect(value))?; + } + "allOf" | "anyOf" | "oneOf" => { + if let Some(arr) = value.as_array() { + for (index, item) in arr.iter().enumerate() { + f(key, None, Some(index), item, draft.detect(item))?; + } + } + } + "definitions" | "patternProperties" | "properties" => { + if let Some(obj) = value.as_object() { + for (child_key, child_value) in obj { + f( + key, + Some(child_key.as_str()), + None, + child_value, + draft.detect(child_value), + )?; + } + } + } + "items" => match value { + Value::Array(arr) => { + for (index, item) in arr.iter().enumerate() { + f("items", None, Some(index), item, draft.detect(item))?; + } + } + _ => f("items", None, None, value, draft.detect(value))?, + }, + "dependencies" => { + if let Some(obj) = value.as_object() { + for (child_key, child_value) in obj { + if !child_value.is_object() { + continue; + } + f( + "dependencies", + Some(child_key.as_str()), + None, + child_value, + draft.detect(child_value), + )?; + } + } + } + _ => {} + } + Ok(()) +} + pub(crate) fn scan_borrowed_object_into_scratch_map<'a>( schema: &'a Map, draft: Draft, @@ -76,42 +135,12 @@ pub(crate) fn scan_borrowed_object_into_scratch_map<'a>( references.schema = Some(reference); } } - "additionalItems" | "additionalProperties" | "contains" | "not" | "propertyNames" => { - children.push((value, draft.detect(value))); - } - "allOf" | "anyOf" | "oneOf" => { - if let Some(arr) = value.as_array() { - for item in arr { - children.push((item, draft.detect(item))); - } - } - } - "definitions" | "patternProperties" | "properties" => { - if let Some(obj) = value.as_object() { - for child_value in obj.values() { - children.push((child_value, draft.detect(child_value))); - } - } - } - "items" => match value { - Value::Array(arr) => { - for item in arr { - children.push((item, draft.detect(item))); - } - } - _ => children.push((value, draft.detect(value))), - }, - "dependencies" => { - if let Some(obj) = value.as_object() { - for child_value in obj.values() { - if !child_value.is_object() { - continue; - } - children.push((child_value, draft.detect(child_value))); - } - } + other => { + let _ = visit_child(other, value, draft, &mut |_kw, _nk, _idx, v, d| { + children.push((v, d)); + Ok::<(), std::convert::Infallible>(()) + }); } - _ => {} } } } @@ -139,62 +168,16 @@ pub(crate) fn scan_owned_object_into_scratch_map<'a>( references.schema = Some(reference); } } - "additionalItems" | "additionalProperties" | "contains" | "not" | "propertyNames" => { - children.push(ChildNode::key(key.as_str(), value, draft.detect(value))); - } - "allOf" | "anyOf" | "oneOf" => { - if let Some(arr) = value.as_array() { - for (index, item) in arr.iter().enumerate() { - children.push(ChildNode::key_index( - key.as_str(), - index, - item, - draft.detect(item), - )); - } - } - } - "definitions" | "patternProperties" | "properties" => { - if let Some(obj) = value.as_object() { - for (child_key, child_value) in obj { - children.push(ChildNode::key_key( - key.as_str(), - child_key.as_str(), - child_value, - draft.detect(child_value), - )); - } - } - } - "items" => match value { - Value::Array(arr) => { - for (index, item) in arr.iter().enumerate() { - children.push(ChildNode::key_index( - "items", - index, - item, - draft.detect(item), - )); - } - } - _ => children.push(ChildNode::key("items", value, draft.detect(value))), - }, - "dependencies" => { - if let Some(obj) = value.as_object() { - for (child_key, child_value) in obj { - if !child_value.is_object() { - continue; - } - children.push(ChildNode::key_key( - key.as_str(), - child_key.as_str(), - child_value, - draft.detect(child_value), - )); + other => { + let _ = visit_child(other, value, draft, &mut |kw, nk, idx, v, d| { + match (nk, idx) { + (Some(k), _) => children.push(ChildNode::key_key(kw, k, v, d)), + (_, Some(i)) => children.push(ChildNode::key_index(kw, i, v, d)), + _ => children.push(ChildNode::key(kw, v, d)), } - } + Ok::<(), std::convert::Infallible>(()) + }); } - _ => {} } } @@ -203,6 +186,7 @@ pub(crate) fn scan_owned_object_into_scratch_map<'a>( Some(id) if !has_anchor && !has_ref => Some(id), _ => None, }; + (id, has_anchor) } @@ -215,44 +199,9 @@ where F: FnMut(&'a Value, Draft) -> Result<(), E>, { for (key, value) in schema { - match key.as_str() { - "additionalItems" | "additionalProperties" | "contains" | "not" | "propertyNames" => { - f(value, draft.detect(value))?; - } - "allOf" | "anyOf" | "oneOf" => { - if let Some(arr) = value.as_array() { - for item in arr { - f(item, draft.detect(item))?; - } - } - } - "definitions" | "patternProperties" | "properties" => { - if let Some(obj) = value.as_object() { - for child_value in obj.values() { - f(child_value, draft.detect(child_value))?; - } - } - } - "items" => match value { - Value::Array(arr) => { - for item in arr { - f(item, draft.detect(item))?; - } - } - _ => f(value, draft.detect(value))?, - }, - "dependencies" => { - if let Some(obj) = value.as_object() { - for child_value in obj.values() { - if !child_value.is_object() { - continue; - } - f(child_value, draft.detect(child_value))?; - } - } - } - _ => {} - } + visit_child(key.as_str(), value, draft, &mut |_kw, _nk, _idx, v, d| { + f(v, d) + })?; } Ok(()) } @@ -267,55 +216,20 @@ where F: FnMut(&JsonPointerNode<'_, '_>, &'a Value, Draft) -> Result<(), E>, { for (key, value) in schema { - match key.as_str() { - "additionalItems" | "additionalProperties" | "contains" | "not" | "propertyNames" => { - let child_path = path.push(key.as_str()); - f(&child_path, value, draft.detect(value))?; - } - "allOf" | "anyOf" | "oneOf" => { - if let Some(arr) = value.as_array() { - let parent_path = path.push(key.as_str()); - for (i, item) in arr.iter().enumerate() { - let child_path = parent_path.push(i); - f(&child_path, item, draft.detect(item))?; - } - } - } - "definitions" | "patternProperties" | "properties" => { - if let Some(obj) = value.as_object() { - let parent_path = path.push(key.as_str()); - for (child_key, child_value) in obj { - let child_path = parent_path.push(child_key.as_str()); - f(&child_path, child_value, draft.detect(child_value))?; - } - } - } - "items" => { - let parent_path = path.push("items"); - match value { - Value::Array(arr) => { - for (i, item) in arr.iter().enumerate() { - let child_path = parent_path.push(i); - f(&child_path, item, draft.detect(item))?; - } - } - _ => f(&parent_path, value, draft.detect(value))?, + visit_child(key.as_str(), value, draft, &mut |kw, nk, idx, v, d| { + let parent = path.push(kw); + match (nk, idx) { + (Some(k), _) => { + let child_path = parent.push(k); + f(&child_path, v, d) } - } - "dependencies" => { - if let Some(obj) = value.as_object() { - let parent_path = path.push(key.as_str()); - for (child_key, child_value) in obj { - if !child_value.is_object() { - continue; - } - let child_path = parent_path.push(child_key.as_str()); - f(&child_path, child_value, draft.detect(child_value))?; - } + (_, Some(i)) => { + let child_path = parent.push(i); + f(&child_path, v, d) } + _ => f(&parent, v, d), } - _ => {} - } + })?; } Ok(()) } diff --git a/crates/jsonschema-referencing/src/spec/draft7.rs b/crates/jsonschema-referencing/src/spec/draft7.rs index ee9ea680..15b3b0c6 100644 --- a/crates/jsonschema-referencing/src/spec/draft7.rs +++ b/crates/jsonschema-referencing/src/spec/draft7.rs @@ -63,6 +63,72 @@ pub(crate) fn object_info(schema: &Map) -> ObjectInfo<'_> { } } +fn visit_child<'a, E>( + key: &'a str, + value: &'a Value, + draft: Draft, + f: &mut impl FnMut(&'a str, Option<&'a str>, Option, &'a Value, Draft) -> Result<(), E>, +) -> Result<(), E> { + match key { + "additionalItems" + | "additionalProperties" + | "contains" + | "else" + | "if" + | "not" + | "propertyNames" + | "then" => { + f(key, None, None, value, draft.detect(value))?; + } + "allOf" | "anyOf" | "oneOf" => { + if let Some(arr) = value.as_array() { + for (index, item) in arr.iter().enumerate() { + f(key, None, Some(index), item, draft.detect(item))?; + } + } + } + "definitions" | "patternProperties" | "properties" => { + if let Some(obj) = value.as_object() { + for (child_key, child_value) in obj { + f( + key, + Some(child_key.as_str()), + None, + child_value, + draft.detect(child_value), + )?; + } + } + } + "items" => match value { + Value::Array(arr) => { + for (index, item) in arr.iter().enumerate() { + f("items", None, Some(index), item, draft.detect(item))?; + } + } + _ => f("items", None, None, value, draft.detect(value))?, + }, + "dependencies" => { + if let Some(obj) = value.as_object() { + for (child_key, child_value) in obj { + if !child_value.is_object() { + continue; + } + f( + "dependencies", + Some(child_key.as_str()), + None, + child_value, + draft.detect(child_value), + )?; + } + } + } + _ => {} + } + Ok(()) +} + pub(crate) fn scan_borrowed_object_into_scratch_map<'a>( schema: &'a Map, draft: Draft, @@ -81,49 +147,12 @@ pub(crate) fn scan_borrowed_object_into_scratch_map<'a>( references.schema = Some(reference); } } - "additionalItems" - | "additionalProperties" - | "contains" - | "else" - | "if" - | "not" - | "propertyNames" - | "then" => { - children.push((value, draft.detect(value))); - } - "allOf" | "anyOf" | "oneOf" => { - if let Some(arr) = value.as_array() { - for item in arr { - children.push((item, draft.detect(item))); - } - } - } - "definitions" | "patternProperties" | "properties" => { - if let Some(obj) = value.as_object() { - for child_value in obj.values() { - children.push((child_value, draft.detect(child_value))); - } - } + other => { + let _ = visit_child(other, value, draft, &mut |_kw, _nk, _idx, v, d| { + children.push((v, d)); + Ok::<(), std::convert::Infallible>(()) + }); } - "items" => match value { - Value::Array(arr) => { - for item in arr { - children.push((item, draft.detect(item))); - } - } - _ => children.push((value, draft.detect(value))), - }, - "dependencies" => { - if let Some(obj) = value.as_object() { - for child_value in obj.values() { - if !child_value.is_object() { - continue; - } - children.push((child_value, draft.detect(child_value))); - } - } - } - _ => {} } } } @@ -151,69 +180,16 @@ pub(crate) fn scan_owned_object_into_scratch_map<'a>( references.schema = Some(reference); } } - "additionalItems" - | "additionalProperties" - | "contains" - | "else" - | "if" - | "not" - | "propertyNames" - | "then" => { - children.push(ChildNode::key(key.as_str(), value, draft.detect(value))); - } - "allOf" | "anyOf" | "oneOf" => { - if let Some(arr) = value.as_array() { - for (index, item) in arr.iter().enumerate() { - children.push(ChildNode::key_index( - key.as_str(), - index, - item, - draft.detect(item), - )); - } - } - } - "definitions" | "patternProperties" | "properties" => { - if let Some(obj) = value.as_object() { - for (child_key, child_value) in obj { - children.push(ChildNode::key_key( - key.as_str(), - child_key.as_str(), - child_value, - draft.detect(child_value), - )); - } - } - } - "items" => match value { - Value::Array(arr) => { - for (index, item) in arr.iter().enumerate() { - children.push(ChildNode::key_index( - "items", - index, - item, - draft.detect(item), - )); + other => { + let _ = visit_child(other, value, draft, &mut |kw, nk, idx, v, d| { + match (nk, idx) { + (Some(k), _) => children.push(ChildNode::key_key(kw, k, v, d)), + (_, Some(i)) => children.push(ChildNode::key_index(kw, i, v, d)), + _ => children.push(ChildNode::key(kw, v, d)), } - } - _ => children.push(ChildNode::key("items", value, draft.detect(value))), - }, - "dependencies" => { - if let Some(obj) = value.as_object() { - for (child_key, child_value) in obj { - if !child_value.is_object() { - continue; - } - children.push(ChildNode::key_key( - key.as_str(), - child_key.as_str(), - child_value, - draft.detect(child_value), - )); - } - } + Ok::<(), std::convert::Infallible>(()) + }); } - _ => {} } } @@ -222,6 +198,7 @@ pub(crate) fn scan_owned_object_into_scratch_map<'a>( Some(id) if !has_anchor && !has_ref => Some(id), _ => None, }; + (id, has_anchor) } @@ -234,49 +211,9 @@ where F: FnMut(&'a Value, Draft) -> Result<(), E>, { for (key, value) in schema { - match key.as_str() { - "additionalItems" - | "additionalProperties" - | "contains" - | "else" - | "if" - | "not" - | "propertyNames" - | "then" => f(value, draft.detect(value))?, - "allOf" | "anyOf" | "oneOf" => { - if let Some(arr) = value.as_array() { - for item in arr { - f(item, draft.detect(item))?; - } - } - } - "definitions" | "patternProperties" | "properties" => { - if let Some(obj) = value.as_object() { - for child_value in obj.values() { - f(child_value, draft.detect(child_value))?; - } - } - } - "items" => match value { - Value::Array(arr) => { - for item in arr { - f(item, draft.detect(item))?; - } - } - _ => f(value, draft.detect(value))?, - }, - "dependencies" => { - if let Some(obj) = value.as_object() { - for child_value in obj.values() { - if !child_value.is_object() { - continue; - } - f(child_value, draft.detect(child_value))?; - } - } - } - _ => {} - } + visit_child(key.as_str(), value, draft, &mut |_kw, _nk, _idx, v, d| { + f(v, d) + })?; } Ok(()) } @@ -291,62 +228,20 @@ where F: FnMut(&JsonPointerNode<'_, '_>, &'a Value, Draft) -> Result<(), E>, { for (key, value) in schema { - match key.as_str() { - "additionalItems" - | "additionalProperties" - | "contains" - | "else" - | "if" - | "not" - | "propertyNames" - | "then" => { - let child_path = path.push(key.as_str()); - f(&child_path, value, draft.detect(value))?; - } - "allOf" | "anyOf" | "oneOf" => { - if let Some(arr) = value.as_array() { - let parent_path = path.push(key.as_str()); - for (i, item) in arr.iter().enumerate() { - let child_path = parent_path.push(i); - f(&child_path, item, draft.detect(item))?; - } + visit_child(key.as_str(), value, draft, &mut |kw, nk, idx, v, d| { + let parent = path.push(kw); + match (nk, idx) { + (Some(k), _) => { + let child_path = parent.push(k); + f(&child_path, v, d) } - } - "definitions" | "patternProperties" | "properties" => { - if let Some(obj) = value.as_object() { - let parent_path = path.push(key.as_str()); - for (child_key, child_value) in obj { - let child_path = parent_path.push(child_key.as_str()); - f(&child_path, child_value, draft.detect(child_value))?; - } + (_, Some(i)) => { + let child_path = parent.push(i); + f(&child_path, v, d) } + _ => f(&parent, v, d), } - "items" => { - let parent_path = path.push("items"); - match value { - Value::Array(arr) => { - for (i, item) in arr.iter().enumerate() { - let child_path = parent_path.push(i); - f(&child_path, item, draft.detect(item))?; - } - } - _ => f(&parent_path, value, draft.detect(value))?, - } - } - "dependencies" => { - if let Some(obj) = value.as_object() { - let parent_path = path.push(key.as_str()); - for (child_key, child_value) in obj { - if !child_value.is_object() { - continue; - } - let child_path = parent_path.push(child_key.as_str()); - f(&child_path, child_value, draft.detect(child_value))?; - } - } - } - _ => {} - } + })?; } Ok(()) } From 1cf800db72642b451fd7dbc9e481d1ce4fa2fbc6 Mon Sep 17 00:00:00 2001 From: Dmitry Dygalo Date: Tue, 7 Apr 2026 22:19:02 +0200 Subject: [PATCH 10/14] wip Signed-off-by: Dmitry Dygalo --- .../src/registry/build.rs | 904 +++++++++++------- .../src/spec/draft201909.rs | 30 - .../src/spec/draft202012.rs | 19 - .../jsonschema-referencing/src/spec/draft4.rs | 29 - .../jsonschema-referencing/src/spec/draft6.rs | 22 - .../jsonschema-referencing/src/spec/draft7.rs | 27 - crates/jsonschema-referencing/src/spec/mod.rs | 1 - 7 files changed, 582 insertions(+), 450 deletions(-) diff --git a/crates/jsonschema-referencing/src/registry/build.rs b/crates/jsonschema-referencing/src/registry/build.rs index 286f1752..c2dc9f0d 100644 --- a/crates/jsonschema-referencing/src/registry/build.rs +++ b/crates/jsonschema-referencing/src/registry/build.rs @@ -12,7 +12,7 @@ use std::{borrow::Cow, collections::VecDeque, num::NonZeroUsize, sync::Arc}; use ahash::{AHashMap, AHashSet}; use fluent_uri::{pct_enc::EStr, Uri}; -use serde_json::Value; +use serde_json::{Map, Value}; use crate::{ cache::UriCache, @@ -172,7 +172,6 @@ pub(super) fn build_prepared_index_for_documents<'a>( )?; } else { let mut local_seen = VisitedRefs::new(); - let mut owned_child_scratch = Vec::new(); process_owned_document( Arc::clone(doc_uri), doc_uri, @@ -182,7 +181,6 @@ pub(super) fn build_prepared_index_for_documents<'a>( &mut state, &mut known_resources, resolution_cache, - &mut owned_child_scratch, &mut local_seen, )?; } @@ -224,51 +222,460 @@ unsafe fn reuse_visited_local_refs<'b>(mut s: VisitedRefs<'_>) -> VisitedRefs<'b std::mem::transmute(s) } +/// Reinterpret `&Value` as `&'long Value` for a value inside an `Arc>`. +/// +/// # Safety +/// - The `Arc>` is kept alive for the entire BFS pass +/// (it lives in `DocumentStore<'long>` whose borrow outlives all BFS calls). +/// - `Value` contains no interior mutability, so no aliasing rule is violated. +#[allow(unsafe_code)] +#[inline] +unsafe fn extend_value_lifetime<'long>(value: &Value) -> &'long Value { + &*std::ptr::from_ref::(value) +} + #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] enum ReferenceKind { Ref, Schema, } -struct ProcessingState<'a> { - queue: VecDeque, +/// Lifetime-free traversal state passed to external-resource collection helpers. +struct TraversalCtx { seen: ReferenceTracker, - // The String is the original reference text (e.g. "./foo.json"), kept solely for - // `json-schema://`-scheme error messages where the resolved URI is not user-friendly. external: AHashSet<(String, Uri, ReferenceKind)>, scratch: String, refers_metaschemas: bool, - custom_metaschemas: Vec, - /// Tracks schema pointers we've visited during recursive external resource collection. - /// This prevents infinite recursion when schemas reference each other. + /// Tracks schema pointer addresses we have visited during recursive collection. visited_schemas: AHashSet, - /// Deferred local-ref targets. During the main traversal, instead of calling - /// `collect_external_resources_recursive` immediately when a local `$ref` is found, - /// the target is pushed here. After `process_queue` completes (full document traversal), - /// subresource targets are already in `visited_schemas` and skipped in O(1) via the - /// pre-stored value address; non-subresource paths (e.g. `#/components/schemas/Foo`) - /// are still fully traversed. deferred_refs: Vec, +} + +impl TraversalCtx { + fn new() -> Self { + Self { + seen: ReferenceTracker::new(), + external: AHashSet::new(), + scratch: String::new(), + refers_metaschemas: false, + visited_schemas: AHashSet::new(), + deferred_refs: Vec::new(), + } + } +} + +/// Unified scan result for a JSON object during BFS traversal. +/// +/// `has_ref_or_schema` is the fast-path flag: set from `ObjectScan.has_ref_or_schema` +/// (borrowed path) or `ref_.is_some() || schema.is_some()` (owned path). +/// `ref_` and `schema` hold actual values only for the owned path (`OwnedStrategy`); +/// they are `None` for the borrowed path where `fill_children` extracts them. +#[derive(Copy, Clone)] +struct ScannedMetadata<'v> { + id: Option<&'v str>, + has_anchor: bool, + has_ref_or_schema: bool, + /// Actual `$ref` value — `Some` only when produced by `OwnedStrategy::scan`. + ref_: Option<&'v str>, + /// Actual `$schema` value — `Some` only when produced by `OwnedStrategy::scan`. + schema: Option<&'v str>, +} + +/// Controls how `explore_subtree` registers subresources and traverses children. +/// +/// Two concrete implementations: +/// - `BorrowedStrategy` — zero-sized type for externally-owned schema values. +/// - `OwnedStrategy` — carries the current JSON Pointer path and document reference. +/// +/// Both are `Copy` so the generic function can pass the strategy by value without cloning. +trait SubtreeStrategy<'v>: Copy { + /// The element type stored in the child scratch buffer. + type Child: Copy + 'v; + + /// Scan `object` to extract id, anchor, and ref metadata. + fn scan(draft: Draft, object: &'v Map) -> ScannedMetadata<'v>; + + /// Return `true` when `object` contains only `$ref` / `$schema` and no child schemas. + /// Borrowed always returns `false`; owned checks `object.len() == 1`. + #[inline] + fn is_ref_only(_object: &Map) -> bool { + false + } + + /// Register `subschema` in the index at `key`. + fn register( + self, + index: &mut Index<'v>, + key: &Arc>, + draft: Draft, + insert: bool, + subschema: &'v Value, + ); + + /// Decide whether to register a subresource that has an `$id`. + /// + /// `is_root` — true when processing the root entry point of the document. + /// `new_base` — the resolved base URI after applying `$id`. + /// `root_uri` — the document root URI. + /// `changed` — true when `new_base != old_base`. + /// `has_anchor` — true when the object also has an anchor keyword. + fn should_register_with_id( + is_root: bool, + new_base: &Arc>, + root_uri: &Arc>, + changed: bool, + has_anchor: bool, + ) -> bool; + + /// Fill the child scratch with children of `object` and return the reference slots. + /// + /// For borrowed: calls `scan_borrowed_object_into_scratch_map` (fills ref/schema slots + /// and child list in one pass). + /// For owned: calls `scan_owned_object_into_scratch_map` (fills children; ref/schema + /// slots come from the pre-computed `scan` result). + fn fill_children( + draft: Draft, + object: &'v Map, + scan: ScannedMetadata<'v>, + state: &mut ProcessingState<'v>, + ) -> ReferenceSlots<'v>; + + /// Slice of children currently in the scratch buffer. + fn child_items<'s>(state: &'s ProcessingState<'v>) -> &'s [Self::Child]; + + /// Truncate the scratch buffer to `to` elements. + fn truncate_children(state: &mut ProcessingState<'v>, to: usize); + + /// Recurse into one child, extending the path/base as appropriate. + fn recurse( + self, + child: Self::Child, + base: Arc>, + document_root: &'v Value, + document_root_uri: &Arc>, + state: &mut ProcessingState<'v>, + known_resources: &mut KnownResources, + resolution_cache: &mut UriCache, + local_seen: &mut VisitedRefs<'v>, + ) -> Result<(), Error>; + + /// Walk subresources on the no-ref path. + fn walk( + self, + draft: Draft, + object: &'v Map, + base: Arc>, + document_root: &'v Value, + document_root_uri: &Arc>, + state: &mut ProcessingState<'v>, + known_resources: &mut KnownResources, + resolution_cache: &mut UriCache, + local_seen: &mut VisitedRefs<'v>, + ) -> Result<(), Error>; +} + +/// Strategy for schemas borrowed from the caller (zero-copy path). +#[derive(Copy, Clone)] +struct BorrowedStrategy; + +impl<'v> SubtreeStrategy<'v> for BorrowedStrategy { + type Child = (&'v Value, Draft); + + #[inline] + fn scan(draft: Draft, object: &'v Map) -> ScannedMetadata<'v> { + let s = draft.scan_object(object); + ScannedMetadata { + id: s.id, + has_anchor: s.has_anchor, + has_ref_or_schema: s.has_ref_or_schema, + ref_: None, + schema: None, + } + } + + // is_ref_only: uses the default (false) + + #[inline] + fn register( + self, + index: &mut Index<'v>, + key: &Arc>, + draft: Draft, + insert: bool, + subschema: &'v Value, + ) { + index.register_borrowed_subresource(key, draft, insert, subschema); + } + + #[inline] + fn should_register_with_id( + is_root: bool, + new_base: &Arc>, + root_uri: &Arc>, + _changed: bool, + _has_anchor: bool, + ) -> bool { + !(is_root && new_base == root_uri) + } + + #[inline] + fn fill_children( + draft: Draft, + object: &'v Map, + _scan: ScannedMetadata<'v>, + state: &mut ProcessingState<'v>, + ) -> ReferenceSlots<'v> { + draft.scan_borrowed_object_into_scratch_map( + object, + &mut state.reference_scratch, + &mut state.borrowed_child_scratch, + ); + ReferenceSlots { + ref_: state.reference_scratch.ref_, + schema: state.reference_scratch.schema, + } + } + + #[inline] + fn child_items<'s>(state: &'s ProcessingState<'v>) -> &'s [Self::Child] { + &state.borrowed_child_scratch + } + + #[inline] + fn truncate_children(state: &mut ProcessingState<'v>, to: usize) { + state.borrowed_child_scratch.truncate(to); + state.reference_scratch.ref_ = None; + state.reference_scratch.schema = None; + } + + fn recurse( + self, + (child, child_draft): (&'v Value, Draft), + base: Arc>, + document_root: &'v Value, + document_root_uri: &Arc>, + state: &mut ProcessingState<'v>, + known_resources: &mut KnownResources, + resolution_cache: &mut UriCache, + local_seen: &mut VisitedRefs<'v>, + ) -> Result<(), Error> { + explore_subtree( + base, + document_root, + child, + child_draft, + false, + document_root_uri, + state, + known_resources, + resolution_cache, + BorrowedStrategy, + local_seen, + ) + } + + fn walk( + self, + draft: Draft, + object: &'v Map, + base: Arc>, + document_root: &'v Value, + document_root_uri: &Arc>, + state: &mut ProcessingState<'v>, + known_resources: &mut KnownResources, + resolution_cache: &mut UriCache, + local_seen: &mut VisitedRefs<'v>, + ) -> Result<(), Error> { + draft.walk_borrowed_subresources_map(object, &mut |child, child_draft| { + explore_subtree( + Arc::clone(&base), + document_root, + child, + child_draft, + false, + document_root_uri, + state, + known_resources, + resolution_cache, + BorrowedStrategy, + local_seen, + ) + }) + } +} + +/// Strategy for owned documents (retrieved at runtime, stored behind `Arc`). +/// +/// Carries the document reference and the current JSON Pointer path so that +/// `register` can record exact pointer locations in the index. +#[derive(Copy, Clone)] +struct OwnedStrategy<'v, 'doc, 'key, 'node> { + document: &'doc Arc>, + path: &'node JsonPointerNode<'key, 'node>, +} + +impl<'v> SubtreeStrategy<'v> for OwnedStrategy<'v, '_, '_, '_> { + type Child = ChildNode<'v>; + + #[inline] + fn scan(draft: Draft, object: &'v Map) -> ScannedMetadata<'v> { + let info = draft.object_info(object); + ScannedMetadata { + id: info.id, + has_anchor: info.has_anchor, + has_ref_or_schema: info.ref_.is_some() || info.schema.is_some(), + ref_: info.ref_, + schema: info.schema, + } + } + + #[inline] + fn is_ref_only(object: &Map) -> bool { + object.len() == 1 + } + + #[inline] + fn register( + self, + index: &mut Index<'v>, + key: &Arc>, + draft: Draft, + insert: bool, + subschema: &'v Value, + ) { + let pointer = ParsedPointer::from_pointer_node(self.path); + index.register_owned_subresource(key, self.document, &pointer, draft, insert, subschema); + } + + #[inline] + fn should_register_with_id( + is_root: bool, + new_base: &Arc>, + root_uri: &Arc>, + changed: bool, + has_anchor: bool, + ) -> bool { + !(is_root && new_base == root_uri) && (changed || has_anchor) + } + + #[inline] + fn fill_children( + draft: Draft, + object: &'v Map, + scan: ScannedMetadata<'v>, + state: &mut ProcessingState<'v>, + ) -> ReferenceSlots<'v> { + let mut throwaway = ReferenceSlots::default(); + draft.scan_owned_object_into_scratch_map( + object, + &mut throwaway, + &mut state.owned_child_scratch, + ); + ReferenceSlots { + ref_: scan.ref_, + schema: scan.schema, + } + } + + #[inline] + fn child_items<'s>(state: &'s ProcessingState<'v>) -> &'s [Self::Child] { + &state.owned_child_scratch + } + + #[inline] + fn truncate_children(state: &mut ProcessingState<'v>, to: usize) { + state.owned_child_scratch.truncate(to); + } + + fn recurse( + self, + child: ChildNode<'v>, + base: Arc>, + document_root: &'v Value, + document_root_uri: &Arc>, + state: &mut ProcessingState<'v>, + known_resources: &mut KnownResources, + resolution_cache: &mut UriCache, + local_seen: &mut VisitedRefs<'v>, + ) -> Result<(), Error> { + with_owned_child_path(self.path, &child, |child_path| { + explore_subtree( + base, + document_root, + child.value, + child.draft, + false, + document_root_uri, + state, + known_resources, + resolution_cache, + OwnedStrategy { + document: self.document, + path: child_path, + }, + local_seen, + ) + }) + } + + fn walk( + self, + draft: Draft, + object: &'v Map, + base: Arc>, + document_root: &'v Value, + document_root_uri: &Arc>, + state: &mut ProcessingState<'v>, + known_resources: &mut KnownResources, + resolution_cache: &mut UriCache, + local_seen: &mut VisitedRefs<'v>, + ) -> Result<(), Error> { + draft.walk_owned_subresources_map( + object, + self.path, + &mut |child_path, child, child_draft| { + explore_subtree( + Arc::clone(&base), + document_root, + child, + child_draft, + false, + document_root_uri, + state, + known_resources, + resolution_cache, + OwnedStrategy { + document: self.document, + path: child_path, + }, + local_seen, + ) + }, + ) + } +} + +struct ProcessingState<'a> { + queue: VecDeque, + custom_metaschemas: Vec, /// Reused scratch for `$ref`/`$schema` slot capture during both borrowed and owned traversal. reference_scratch: ReferenceSlots<'a>, borrowed_child_scratch: Vec<(&'a Value, Draft)>, + owned_child_scratch: Vec>, index: Index<'a>, + ctx: TraversalCtx, } impl ProcessingState<'_> { fn new() -> Self { Self { queue: VecDeque::with_capacity(32), - seen: ReferenceTracker::new(), - external: AHashSet::new(), - scratch: String::new(), - refers_metaschemas: false, custom_metaschemas: Vec::new(), - visited_schemas: AHashSet::new(), - deferred_refs: Vec::new(), reference_scratch: ReferenceSlots::default(), borrowed_child_scratch: Vec::new(), + owned_child_scratch: Vec::new(), index: Index::default(), + ctx: TraversalCtx::new(), } } } @@ -357,7 +764,6 @@ fn process_queue<'r>( continue; } let mut visited = VisitedRefs::new(); - let mut owned_child_scratch = Vec::new(); process_owned_document( base, &root_uri, @@ -367,7 +773,6 @@ fn process_queue<'r>( state, known_resources, resolution_cache, - &mut owned_child_scratch, &mut visited, )?; } @@ -407,78 +812,89 @@ fn process_borrowed_document<'r>( ) } -fn explore_borrowed_subtree<'r>( - mut current_base_uri: Arc>, - document_root: &'r Value, - subschema: &'r Value, +fn explore_subtree<'v, S: SubtreeStrategy<'v>>( + mut base: Arc>, + document_root: &'v Value, + subschema: &'v Value, draft: Draft, is_root_entry: bool, document_root_uri: &Arc>, - state: &mut ProcessingState<'r>, + state: &mut ProcessingState<'v>, known_resources: &mut KnownResources, resolution_cache: &mut UriCache, - local_seen: &mut VisitedRefs<'r>, + strategy: S, + local_seen: &mut VisitedRefs<'v>, ) -> Result<(), Error> { let Some(object) = subschema.as_object() else { return Ok(()); }; - let scan = draft.scan_object(object); + let scan = S::scan(draft, object); if let Some(id) = scan.id { - let (new_base, insert_resource) = - resolve_subresource_id(¤t_base_uri, id, known_resources, resolution_cache)?; - current_base_uri = new_base; - if !(is_root_entry && current_base_uri == *document_root_uri) { - state.index.register_borrowed_subresource( - ¤t_base_uri, - draft, - insert_resource, - subschema, - ); + let (new_base, changed) = + resolve_subresource_id(&base, id, known_resources, resolution_cache)?; + base = new_base; + if S::should_register_with_id( + is_root_entry, + &base, + document_root_uri, + changed, + scan.has_anchor, + ) { + strategy.register(&mut state.index, &base, draft, changed, subschema); } } else if scan.has_anchor && !is_root_entry { - state - .index - .register_borrowed_subresource(¤t_base_uri, draft, false, subschema); + strategy.register(&mut state.index, &base, draft, false, subschema); } if scan.has_ref_or_schema { - let child_start = state.borrowed_child_scratch.len(); - draft.scan_borrowed_object_into_scratch_map( - object, - &mut state.reference_scratch, - &mut state.borrowed_child_scratch, - ); - let child_end = state.borrowed_child_scratch.len(); + if S::is_ref_only(object) { + let subschema_ptr = std::ptr::from_ref::(subschema) as usize; + if state.ctx.visited_schemas.insert(subschema_ptr) { + let slots = ReferenceSlots { + ref_: scan.ref_, + schema: scan.schema, + }; + collect_external_resources_from_slots( + &base, + document_root, + &slots, + &mut state.ctx, + resolution_cache, + draft, + document_root_uri, + local_seen, + )?; + } + return Ok(()); + } + + let child_start = S::child_items(state).len(); + let slots = S::fill_children(draft, object, scan, state); + let child_end = S::child_items(state).len(); let subschema_ptr = std::ptr::from_ref::(subschema) as usize; - if state.visited_schemas.insert(subschema_ptr) { + if state.ctx.visited_schemas.insert(subschema_ptr) { collect_external_resources_from_slots( - ¤t_base_uri, + &base, document_root, - &state.reference_scratch, - &mut state.external, - &mut state.seen, + &slots, + &mut state.ctx, resolution_cache, - &mut state.scratch, - &mut state.refers_metaschemas, draft, document_root_uri, - &mut state.deferred_refs, local_seen, )?; } let mut idx = child_start; while idx < child_end { - let (child, child_draft) = state.borrowed_child_scratch[idx]; + let child = S::child_items(state)[idx]; idx += 1; - explore_borrowed_subtree( - Arc::clone(¤t_base_uri), - document_root, + strategy.recurse( child, - child_draft, - false, + Arc::clone(&base), + document_root, document_root_uri, state, known_resources, @@ -486,64 +902,81 @@ fn explore_borrowed_subtree<'r>( local_seen, )?; } - - state.reference_scratch.ref_ = None; - state.reference_scratch.schema = None; - state.borrowed_child_scratch.truncate(child_start); + S::truncate_children(state, child_start); return Ok(()); } let subschema_ptr = std::ptr::from_ref::(subschema) as usize; - if state.visited_schemas.insert(subschema_ptr) { + if state.ctx.visited_schemas.insert(subschema_ptr) { collect_external_resources( - ¤t_base_uri, + &base, document_root, subschema, - &mut state.external, - &mut state.seen, + &mut state.ctx, resolution_cache, - &mut state.scratch, - &mut state.refers_metaschemas, draft, document_root_uri, - &mut state.deferred_refs, local_seen, )?; } + strategy.walk( + draft, + object, + base, + document_root, + document_root_uri, + state, + known_resources, + resolution_cache, + local_seen, + ) +} - draft.walk_borrowed_subresources_map(object, &mut |child, child_draft| { - explore_borrowed_subtree( - Arc::clone(¤t_base_uri), - document_root, - child, - child_draft, - false, - document_root_uri, - state, - known_resources, - resolution_cache, - local_seen, - ) - }) +fn explore_borrowed_subtree<'r>( + current_base_uri: Arc>, + document_root: &'r Value, + subschema: &'r Value, + draft: Draft, + is_root_entry: bool, + document_root_uri: &Arc>, + state: &mut ProcessingState<'r>, + known_resources: &mut KnownResources, + resolution_cache: &mut UriCache, + local_seen: &mut VisitedRefs<'r>, +) -> Result<(), Error> { + explore_subtree( + current_base_uri, + document_root, + subschema, + draft, + is_root_entry, + document_root_uri, + state, + known_resources, + resolution_cache, + BorrowedStrategy, + local_seen, + ) } -fn process_owned_document<'a, 'r>( +#[allow(unsafe_code)] +fn process_owned_document<'r>( current_base_uri: Arc>, document_root_uri: &Arc>, - document: &'a Arc>, + document: &Arc>, pointer_path: &str, draft: Draft, state: &mut ProcessingState<'r>, known_resources: &mut KnownResources, resolution_cache: &mut UriCache, - owned_child_scratch: &mut Vec>, - local_seen: &mut VisitedRefs<'a>, + local_seen: &mut VisitedRefs<'r>, ) -> Result<(), Error> { - let document_root = document.contents(); + // SAFETY: document lives in DocumentStore<'r> for the full BFS duration. + let document_root: &'r Value = unsafe { extend_value_lifetime(document.contents()) }; let Some(subschema) = (if pointer_path.is_empty() { Some(document_root) } else { - pointer(document_root, pointer_path) + pointer(document_root, pointer_path).map(|v| unsafe { extend_value_lifetime(v) }) }) else { return Ok(()); }; @@ -562,7 +995,6 @@ fn process_owned_document<'a, 'r>( state, known_resources, resolution_cache, - owned_child_scratch, local_seen, ) }) @@ -595,10 +1027,11 @@ fn with_pointer_node_from_parsed( } } -fn explore_owned_subtree<'a, 'r>( - mut current_base_uri: Arc>, - document_root: &'a Value, - subschema: &'a Value, +#[allow(unsafe_code)] +fn explore_owned_subtree<'r>( + current_base_uri: Arc>, + document_root: &'r Value, + subschema: &'r Value, draft: Draft, is_root_entry: bool, path: &JsonPointerNode<'_, '_>, @@ -607,165 +1040,21 @@ fn explore_owned_subtree<'a, 'r>( state: &mut ProcessingState<'r>, known_resources: &mut KnownResources, resolution_cache: &mut UriCache, - owned_child_scratch: &mut Vec>, - local_seen: &mut VisitedRefs<'a>, + local_seen: &mut VisitedRefs<'r>, ) -> Result<(), Error> { - let Some(object) = subschema.as_object() else { - return Ok(()); - }; - let child_start = owned_child_scratch.len(); - let info = draft.object_info(object); - let (id, has_anchors) = (info.id, info.has_anchor); - if let Some(id) = id { - let (new_base, insert_resource) = - resolve_subresource_id(¤t_base_uri, id, known_resources, resolution_cache)?; - current_base_uri = new_base; - if !(is_root_entry && current_base_uri == *document_root_uri) - && (insert_resource || has_anchors) - { - let pointer = ParsedPointer::from_pointer_node(path); - state.index.register_owned_subresource( - ¤t_base_uri, - document, - &pointer, - draft, - insert_resource, - subschema, - ); - } - } else if has_anchors && !is_root_entry { - let pointer = ParsedPointer::from_pointer_node(path); - state.index.register_owned_subresource( - ¤t_base_uri, - document, - &pointer, - draft, - false, - subschema, - ); - } - - if info.ref_.is_some() || info.schema.is_some() { - // Build stack-local slots from already-computed info; avoids a mutable scratch - // parameter while keeping zero allocations. - let slots = ReferenceSlots { - ref_: info.ref_, - schema: info.schema, - }; - - if object.len() == 1 { - let subschema_ptr = std::ptr::from_ref::(subschema) as usize; - if state.visited_schemas.insert(subschema_ptr) { - collect_external_resources_from_slots( - ¤t_base_uri, - document_root, - &slots, - &mut state.external, - &mut state.seen, - resolution_cache, - &mut state.scratch, - &mut state.refers_metaschemas, - draft, - document_root_uri, - &mut state.deferred_refs, - local_seen, - )?; - } - return Ok(()); - } - - if info.has_children { - // Use a temporary slot just to satisfy the API; children are written into - // owned_child_scratch which is what we actually need. - let mut scan_slots = ReferenceSlots::default(); - let (_, _) = draft.scan_owned_object_into_scratch_map( - object, - &mut scan_slots, - owned_child_scratch, - ); - let subschema_ptr = std::ptr::from_ref::(subschema) as usize; - if state.visited_schemas.insert(subschema_ptr) { - collect_external_resources_from_slots( - ¤t_base_uri, - document_root, - &slots, - &mut state.external, - &mut state.seen, - resolution_cache, - &mut state.scratch, - &mut state.refers_metaschemas, - draft, - document_root_uri, - &mut state.deferred_refs, - local_seen, - )?; - } - - let child_end = owned_child_scratch.len(); - let mut idx = child_start; - while idx < child_end { - let child = owned_child_scratch[idx]; - idx += 1; - with_owned_child_path(path, &child, |child_path| { - explore_owned_subtree( - Arc::clone(¤t_base_uri), - document_root, - child.value, - child.draft, - false, - child_path, - document_root_uri, - document, - state, - known_resources, - resolution_cache, - owned_child_scratch, - local_seen, - ) - })?; - } - - owned_child_scratch.truncate(child_start); - return Ok(()); - } - - let subschema_ptr = std::ptr::from_ref::(subschema) as usize; - if state.visited_schemas.insert(subschema_ptr) { - collect_external_resources_from_slots( - ¤t_base_uri, - document_root, - &slots, - &mut state.external, - &mut state.seen, - resolution_cache, - &mut state.scratch, - &mut state.refers_metaschemas, - draft, - document_root_uri, - &mut state.deferred_refs, - local_seen, - )?; - } - return Ok(()); - } - - draft.walk_owned_subresources_map(object, path, &mut |child_path, child, child_draft| { - explore_owned_subtree( - Arc::clone(¤t_base_uri), - document_root, - child, - child_draft, - false, - child_path, - document_root_uri, - document, - state, - known_resources, - resolution_cache, - owned_child_scratch, - local_seen, - ) - }) + explore_subtree( + current_base_uri, + document_root, + subschema, + draft, + is_root_entry, + document_root_uri, + state, + known_resources, + resolution_cache, + OwnedStrategy { document, path }, + local_seen, + ) } fn enqueue_fragment_entry( @@ -896,7 +1185,7 @@ fn fetch_external_resources<'a>( default_draft: Draft, retriever: &dyn Retrieve, ) -> Result<(), Error> { - for (original, uri, kind) in state.external.drain() { + for (original, uri, kind) in state.ctx.external.drain() { let mut fragmentless = uri.clone(); fragmentless.set_fragment(None); if !known_resources.contains(&fragmentless) { @@ -937,12 +1226,12 @@ async fn fetch_external_resources_async<'a>( ) -> Result<(), Error> { type ExternalRefsByBase = AHashMap, Vec<(String, Uri, ReferenceKind)>>; - if state.external.is_empty() { + if state.ctx.external.is_empty() { return Ok(()); } let mut grouped = ExternalRefsByBase::new(); - for (original, uri, kind) in state.external.drain() { + for (original, uri, kind) in state.ctx.external.drain() { let mut fragmentless = uri.clone(); fragmentless.set_fragment(None); if !known_resources.contains(&fragmentless) { @@ -997,7 +1286,7 @@ fn finalize_index<'a>( visited: VisitedRefs<'static>, ) -> Result<(), Error> { handle_metaschemas( - state.refers_metaschemas, + state.ctx.refers_metaschemas, documents, known_resources, default_draft, @@ -1028,7 +1317,7 @@ fn resolve_and_index<'a>( ) -> Result<(), Error> { let mut visited: VisitedRefs<'static> = VisitedRefs::new(); - while !(state.queue.is_empty() && state.external.is_empty()) { + while !(state.queue.is_empty() && state.ctx.external.is_empty()) { drain_queue_and_deferred( state, documents, @@ -1063,7 +1352,7 @@ async fn resolve_and_index_async<'a>( ) -> Result<(), Error> { let mut visited: VisitedRefs<'static> = VisitedRefs::new(); - while !(state.queue.is_empty() && state.external.is_empty()) { + while !(state.queue.is_empty() && state.ctx.external.is_empty()) { drain_queue_and_deferred( state, documents, @@ -1135,14 +1424,10 @@ fn collect_external_resources_from_slots<'doc>( base: &Arc>, root: &'doc Value, references: &ReferenceSlots<'doc>, - collected: &mut AHashSet<(String, Uri, ReferenceKind)>, - seen: &mut ReferenceTracker, + ctx: &mut TraversalCtx, resolution_cache: &mut UriCache, - scratch: &mut String, - refers_metaschemas: &mut bool, draft: Draft, doc_key: &Arc>, - deferred_refs: &mut Vec, visited: &mut VisitedRefs<'doc>, ) -> Result<(), Error> { for (reference, key) in [(references.ref_, "$ref"), (references.schema, "$schema")] { @@ -1154,7 +1439,7 @@ fn collect_external_resources_from_slots<'doc>( || base.as_str().starts_with("https://json-schema.org/draft/") { if key == "$ref" { - *refers_metaschemas = true; + ctx.refers_metaschemas = true; } continue; } @@ -1167,7 +1452,7 @@ fn collect_external_resources_from_slots<'doc>( if let Some(referenced) = pointer(root, ptr) { let target_draft = draft.detect(referenced); let value_addr = std::ptr::from_ref::(referenced) as usize; - deferred_refs.push(( + ctx.deferred_refs.push(( Arc::clone(base), Arc::clone(doc_key), ptr.to_string(), @@ -1178,7 +1463,7 @@ fn collect_external_resources_from_slots<'doc>( } continue; } - if mark_reference(seen, base, reference) { + if mark_reference(&mut ctx.seen, base, reference) { let resolved = if base.has_fragment() { let mut base_without_fragment = base.as_ref().clone(); base_without_fragment.set_fragment(None); @@ -1195,10 +1480,10 @@ fn collect_external_resources_from_slots<'doc>( if let Some(encoded) = uri::EncodedString::new(fragment) { resolved = resolved.with_fragment(Some(encoded)); } else { - uri::encode_to(fragment, scratch); - resolved = - resolved.with_fragment(Some(uri::EncodedString::new_or_panic(scratch))); - scratch.clear(); + uri::encode_to(fragment, &mut ctx.scratch); + resolved = resolved + .with_fragment(Some(uri::EncodedString::new_or_panic(&ctx.scratch))); + ctx.scratch.clear(); } } resolved @@ -1211,7 +1496,7 @@ fn collect_external_resources_from_slots<'doc>( } else { ReferenceKind::Ref }; - collected.insert((reference.to_string(), resolved, kind)); + ctx.external.insert((reference.to_string(), resolved, kind)); } } Ok(()) @@ -1241,14 +1526,10 @@ fn collect_external_resources<'doc>( base: &Arc>, root: &'doc Value, contents: &'doc Value, - collected: &mut AHashSet<(String, Uri, ReferenceKind)>, - seen: &mut ReferenceTracker, + ctx: &mut TraversalCtx, resolution_cache: &mut UriCache, - scratch: &mut String, - refers_metaschemas: &mut bool, draft: Draft, doc_key: &Arc>, - deferred_refs: &mut Vec, visited: &mut VisitedRefs<'doc>, ) -> Result<(), Error> { if base.scheme().as_str() == "urn" { @@ -1262,7 +1543,7 @@ fn collect_external_resources<'doc>( || base.as_str().starts_with("https://json-schema.org/draft/") { if $key == "$ref" { - *refers_metaschemas = true; + ctx.refers_metaschemas = true; } } else if $reference != "#" { if $reference.starts_with('#') { @@ -1271,7 +1552,7 @@ fn collect_external_resources<'doc>( if let Some(referenced) = pointer(root, ptr) { let target_draft = draft.detect(referenced); let value_addr = std::ptr::from_ref::(referenced) as usize; - deferred_refs.push(( + ctx.deferred_refs.push(( Arc::clone(base), Arc::clone(doc_key), ptr.to_string(), @@ -1280,7 +1561,7 @@ fn collect_external_resources<'doc>( )); } } - } else if mark_reference(seen, base, $reference) { + } else if mark_reference(&mut ctx.seen, base, $reference) { let resolved = if base.has_fragment() { let mut base_without_fragment = base.as_ref().clone(); base_without_fragment.set_fragment(None); @@ -1297,10 +1578,11 @@ fn collect_external_resources<'doc>( if let Some(encoded) = uri::EncodedString::new(fragment) { resolved = resolved.with_fragment(Some(encoded)); } else { - uri::encode_to(fragment, scratch); - resolved = resolved - .with_fragment(Some(uri::EncodedString::new_or_panic(scratch))); - scratch.clear(); + uri::encode_to(fragment, &mut ctx.scratch); + resolved = resolved.with_fragment(Some( + uri::EncodedString::new_or_panic(&ctx.scratch), + )); + ctx.scratch.clear(); } } resolved @@ -1313,7 +1595,8 @@ fn collect_external_resources<'doc>( } else { ReferenceKind::Ref }; - collected.insert(($reference.to_string(), resolved, kind)); + ctx.external + .insert(($reference.to_string(), resolved, kind)); } } }; @@ -1348,19 +1631,14 @@ fn collect_external_resources_recursive<'doc>( base: &Arc>, root: &'doc Value, contents: &'doc Value, - collected: &mut AHashSet<(String, Uri, ReferenceKind)>, - seen: &mut ReferenceTracker, + ctx: &mut TraversalCtx, resolution_cache: &mut UriCache, - scratch: &mut String, - refers_metaschemas: &mut bool, draft: Draft, - visited: &mut AHashSet, doc_key: &Arc>, - deferred_refs: &mut Vec, visited_refs: &mut VisitedRefs<'doc>, ) -> Result<(), Error> { let ptr = std::ptr::from_ref::(contents) as usize; - if !visited.insert(ptr) { + if !ctx.visited_schemas.insert(ptr) { return Ok(()); } @@ -1373,14 +1651,10 @@ fn collect_external_resources_recursive<'doc>( ¤t_base, root, contents, - collected, - seen, + ctx, resolution_cache, - scratch, - refers_metaschemas, draft, doc_key, - deferred_refs, visited_refs, )?; @@ -1390,15 +1664,10 @@ fn collect_external_resources_recursive<'doc>( ¤t_base, root, subresource, - collected, - seen, + ctx, resolution_cache, - scratch, - refers_metaschemas, subresource_draft, - visited, doc_key, - deferred_refs, visited_refs, )?; } @@ -1419,14 +1688,14 @@ fn process_deferred_refs<'a>( resolution_cache: &mut UriCache, local_seen: &mut VisitedRefs<'a>, ) -> Result<(), Error> { - while !state.deferred_refs.is_empty() { - let batch = std::mem::take(&mut state.deferred_refs); + while !state.ctx.deferred_refs.is_empty() { + let batch = std::mem::take(&mut state.ctx.deferred_refs); for (base, doc_key, pointer_path, draft, value_addr) in batch { // Fast path: if this target was already visited by the main BFS traversal // (e.g. a `#/definitions/Foo` that `walk_subresources_with_path` descended into), // all its subresources were processed and `collect_external_resources` was already // called on each — skip without a redundant `pointer()` traversal. - if state.visited_schemas.contains(&value_addr) { + if state.ctx.visited_schemas.contains(&value_addr) { continue; } let Some(document) = documents.get(&doc_key) else { @@ -1444,15 +1713,10 @@ fn process_deferred_refs<'a>( &base, root, contents, - &mut state.external, - &mut state.seen, + &mut state.ctx, resolution_cache, - &mut state.scratch, - &mut state.refers_metaschemas, draft, - &mut state.visited_schemas, &doc_key, - &mut state.deferred_refs, local_seen, )?; } @@ -1714,7 +1978,6 @@ mod tests { let mut known_resources = KnownResources::default(); let mut resolution_cache = UriCache::new(); let mut local_seen = VisitedRefs::new(); - let mut owned_child_scratch = Vec::new(); known_resources.insert((*doc_key).clone()); state.index.register_document(&doc_key, &document); @@ -1728,7 +1991,6 @@ mod tests { &mut state, &mut known_resources, &mut resolution_cache, - &mut owned_child_scratch, &mut local_seen, ) .expect("owned document traversal should succeed"); @@ -1757,7 +2019,6 @@ mod tests { let mut known_resources = KnownResources::default(); let mut resolution_cache = UriCache::new(); let mut local_seen = VisitedRefs::new(); - let mut owned_child_scratch = Vec::new(); known_resources.insert((*doc_key).clone()); state.index.register_document(&doc_key, &document); @@ -1771,7 +2032,6 @@ mod tests { &mut state, &mut known_resources, &mut resolution_cache, - &mut owned_child_scratch, &mut local_seen, ) .expect("owned fragment traversal should succeed"); diff --git a/crates/jsonschema-referencing/src/spec/draft201909.rs b/crates/jsonschema-referencing/src/spec/draft201909.rs index 00783297..838ef915 100644 --- a/crates/jsonschema-referencing/src/spec/draft201909.rs +++ b/crates/jsonschema-referencing/src/spec/draft201909.rs @@ -13,7 +13,6 @@ pub(crate) fn object_info(schema: &Map) -> ObjectInfo<'_> { let mut has_anchor = false; let mut ref_ = None; let mut schema_ref = None; - let mut has_children = false; for (key, value) in schema { match key.as_str() { @@ -21,34 +20,6 @@ pub(crate) fn object_info(schema: &Map) -> ObjectInfo<'_> { "$anchor" => has_anchor |= value.as_str().is_some(), "$ref" => ref_ = value.as_str(), "$schema" => schema_ref = value.as_str(), - "additionalItems" - | "additionalProperties" - | "contains" - | "contentSchema" - | "else" - | "if" - | "not" - | "propertyNames" - | "then" - | "unevaluatedItems" - | "unevaluatedProperties" => has_children = true, - "allOf" | "anyOf" | "oneOf" => { - has_children |= value.as_array().is_some_and(|items| !items.is_empty()); - } - "$defs" | "definitions" | "dependentSchemas" | "patternProperties" | "properties" => { - has_children |= value.as_object().is_some_and(|items| !items.is_empty()); - } - "items" => { - has_children |= match value { - Value::Array(items) => !items.is_empty(), - _ => true, - }; - } - "dependencies" => { - has_children |= value - .as_object() - .is_some_and(|items| items.values().any(Value::is_object)); - } _ => {} } } @@ -58,7 +29,6 @@ pub(crate) fn object_info(schema: &Map) -> ObjectInfo<'_> { has_anchor, ref_, schema: schema_ref, - has_children, } } diff --git a/crates/jsonschema-referencing/src/spec/draft202012.rs b/crates/jsonschema-referencing/src/spec/draft202012.rs index 894fa04e..3a3470bc 100644 --- a/crates/jsonschema-referencing/src/spec/draft202012.rs +++ b/crates/jsonschema-referencing/src/spec/draft202012.rs @@ -88,7 +88,6 @@ pub(crate) fn object_info(schema: &Map) -> ObjectInfo<'_> { let mut has_anchor = false; let mut ref_ = None; let mut schema_ref = None; - let mut has_children = false; for (key, value) in schema { match key.as_str() { @@ -96,23 +95,6 @@ pub(crate) fn object_info(schema: &Map) -> ObjectInfo<'_> { "$anchor" | "$dynamicAnchor" => has_anchor |= value.as_str().is_some(), "$ref" => ref_ = value.as_str(), "$schema" => schema_ref = value.as_str(), - "additionalProperties" - | "contains" - | "contentSchema" - | "else" - | "if" - | "items" - | "not" - | "propertyNames" - | "then" - | "unevaluatedItems" - | "unevaluatedProperties" => has_children = true, - "allOf" | "anyOf" | "oneOf" | "prefixItems" => { - has_children |= value.as_array().is_some_and(|items| !items.is_empty()); - } - "$defs" | "definitions" | "dependentSchemas" | "patternProperties" | "properties" => { - has_children |= value.as_object().is_some_and(|items| !items.is_empty()); - } _ => {} } } @@ -122,7 +104,6 @@ pub(crate) fn object_info(schema: &Map) -> ObjectInfo<'_> { has_anchor, ref_, schema: schema_ref, - has_children, } } diff --git a/crates/jsonschema-referencing/src/spec/draft4.rs b/crates/jsonschema-referencing/src/spec/draft4.rs index ac5e7966..2c5bc44a 100644 --- a/crates/jsonschema-referencing/src/spec/draft4.rs +++ b/crates/jsonschema-referencing/src/spec/draft4.rs @@ -12,40 +12,12 @@ pub(crate) fn object_info(schema: &Map) -> ObjectInfo<'_> { let mut raw_id = None; let mut ref_ = None; let mut schema_ref = None; - let mut has_children = false; for (key, value) in schema { match key.as_str() { "id" => raw_id = value.as_str(), "$ref" => ref_ = value.as_str(), "$schema" => schema_ref = value.as_str(), - "additionalItems" | "additionalProperties" => has_children |= value.is_object(), - "contains" - | "contentSchema" - | "else" - | "if" - | "not" - | "propertyNames" - | "then" - | "unevaluatedItems" - | "unevaluatedProperties" => has_children = true, - "allOf" | "anyOf" | "oneOf" | "prefixItems" => { - has_children |= value.as_array().is_some_and(|items| !items.is_empty()); - } - "$defs" | "definitions" | "dependentSchemas" | "patternProperties" | "properties" => { - has_children |= value.as_object().is_some_and(|items| !items.is_empty()); - } - "items" => { - has_children |= match value { - Value::Array(items) => !items.is_empty(), - _ => true, - }; - } - "dependencies" => { - has_children |= value - .as_object() - .is_some_and(|items| items.values().any(Value::is_object)); - } _ => {} } } @@ -61,7 +33,6 @@ pub(crate) fn object_info(schema: &Map) -> ObjectInfo<'_> { has_anchor, ref_, schema: schema_ref, - has_children, } } diff --git a/crates/jsonschema-referencing/src/spec/draft6.rs b/crates/jsonschema-referencing/src/spec/draft6.rs index e0347bb1..27d89bab 100644 --- a/crates/jsonschema-referencing/src/spec/draft6.rs +++ b/crates/jsonschema-referencing/src/spec/draft6.rs @@ -12,33 +12,12 @@ pub(crate) fn object_info(schema: &Map) -> ObjectInfo<'_> { let mut raw_id = None; let mut ref_ = None; let mut schema_ref = None; - let mut has_children = false; for (key, value) in schema { match key.as_str() { "$id" => raw_id = value.as_str(), "$ref" => ref_ = value.as_str(), "$schema" => schema_ref = value.as_str(), - "additionalItems" | "additionalProperties" | "contains" | "not" | "propertyNames" => { - has_children = true; - } - "allOf" | "anyOf" | "oneOf" => { - has_children |= value.as_array().is_some_and(|items| !items.is_empty()); - } - "definitions" | "patternProperties" | "properties" => { - has_children |= value.as_object().is_some_and(|items| !items.is_empty()); - } - "items" => { - has_children |= match value { - Value::Array(items) => !items.is_empty(), - _ => true, - }; - } - "dependencies" => { - has_children |= value - .as_object() - .is_some_and(|items| items.values().any(Value::is_object)); - } _ => {} } } @@ -54,7 +33,6 @@ pub(crate) fn object_info(schema: &Map) -> ObjectInfo<'_> { has_anchor, ref_, schema: schema_ref, - has_children, } } diff --git a/crates/jsonschema-referencing/src/spec/draft7.rs b/crates/jsonschema-referencing/src/spec/draft7.rs index 15b3b0c6..36b61f37 100644 --- a/crates/jsonschema-referencing/src/spec/draft7.rs +++ b/crates/jsonschema-referencing/src/spec/draft7.rs @@ -12,38 +12,12 @@ pub(crate) fn object_info(schema: &Map) -> ObjectInfo<'_> { let mut raw_id = None; let mut ref_ = None; let mut schema_ref = None; - let mut has_children = false; for (key, value) in schema { match key.as_str() { "$id" => raw_id = value.as_str(), "$ref" => ref_ = value.as_str(), "$schema" => schema_ref = value.as_str(), - "additionalItems" - | "additionalProperties" - | "contains" - | "else" - | "if" - | "not" - | "propertyNames" - | "then" => has_children = true, - "allOf" | "anyOf" | "oneOf" => { - has_children |= value.as_array().is_some_and(|items| !items.is_empty()); - } - "definitions" | "patternProperties" | "properties" => { - has_children |= value.as_object().is_some_and(|items| !items.is_empty()); - } - "items" => { - has_children |= match value { - Value::Array(items) => !items.is_empty(), - _ => true, - }; - } - "dependencies" => { - has_children |= value - .as_object() - .is_some_and(|items| items.values().any(Value::is_object)); - } _ => {} } } @@ -59,7 +33,6 @@ pub(crate) fn object_info(schema: &Map) -> ObjectInfo<'_> { has_anchor, ref_, schema: schema_ref, - has_children, } } diff --git a/crates/jsonschema-referencing/src/spec/mod.rs b/crates/jsonschema-referencing/src/spec/mod.rs index 1b2ce37b..6b2a67c3 100644 --- a/crates/jsonschema-referencing/src/spec/mod.rs +++ b/crates/jsonschema-referencing/src/spec/mod.rs @@ -39,7 +39,6 @@ pub(crate) struct ObjectInfo<'a> { pub(crate) has_anchor: bool, pub(crate) ref_: Option<&'a str>, pub(crate) schema: Option<&'a str>, - pub(crate) has_children: bool, } /// A child node queued for the BFS traversal of a schema document. From 27030494c0ee5771b77e64441ffb469262ff5306 Mon Sep 17 00:00:00 2001 From: Dmitry Dygalo Date: Tue, 7 Apr 2026 22:51:41 +0200 Subject: [PATCH 11/14] wip Signed-off-by: Dmitry Dygalo --- crates/jsonschema-referencing/src/draft.rs | 275 +-------------- .../src/registry/build.rs | 315 +++--------------- .../src/spec/draft201909.rs | 173 ++++------ .../src/spec/draft202012.rs | 191 ++--------- .../jsonschema-referencing/src/spec/draft4.rs | 195 +++-------- .../jsonschema-referencing/src/spec/draft6.rs | 167 +++------- .../jsonschema-referencing/src/spec/draft7.rs | 167 +++------- crates/jsonschema-referencing/src/spec/mod.rs | 72 ++-- 8 files changed, 341 insertions(+), 1214 deletions(-) diff --git a/crates/jsonschema-referencing/src/draft.rs b/crates/jsonschema-referencing/src/draft.rs index 40f91884..6693124b 100644 --- a/crates/jsonschema-referencing/src/draft.rs +++ b/crates/jsonschema-referencing/src/draft.rs @@ -2,12 +2,9 @@ use serde_json::{Map, Value}; use crate::{ anchor, - spec::{ - self, draft201909, draft202012, draft4, draft6, draft7, has_ref_or_schema, ChildNode, - ObjectInfo, ObjectScan, ReferenceSlots, - }, + spec::{self, draft201909, draft202012, draft4, draft6, draft7, ChildBuffer, ObjectAnalysis}, vocabularies::{VocabularySet, DRAFT_2019_09_VOCABULARIES, DRAFT_2020_12_VOCABULARIES}, - Anchor, Error, JsonPointerNode, Resolver, Resource, ResourceRef, Segments, + Anchor, Error, Resolver, Resource, ResourceRef, Segments, }; /// JSON Schema specification versions. @@ -95,15 +92,6 @@ impl Draft { } } - #[inline] - pub(crate) fn scan_object(self, obj: &Map) -> ObjectScan<'_> { - match self { - Draft::Draft4 => analyze_legacy_id_object(obj), - Draft::Draft6 | Draft::Draft7 => analyze_legacy_dollar_id_object(obj), - Draft::Draft201909 => analyze_id_and_anchor_object(obj), - Draft::Draft202012 | Draft::Unknown => analyze_id_and_any_anchor_object(obj), - } - } pub fn subresources_of(self, contents: &Value) -> impl Iterator { match contents.as_object() { Some(schema) => { @@ -119,99 +107,21 @@ impl Draft { None => draft202012::SubresourceIterator::Empty, } } - pub(crate) fn scan_borrowed_object_into_scratch_map<'a>( - self, - contents: &'a Map, - references: &mut ReferenceSlots<'a>, - children: &mut Vec<(&'a Value, Draft)>, - ) { - match self { - Draft::Draft4 => { - draft4::scan_borrowed_object_into_scratch_map(contents, self, references, children); - } - Draft::Draft6 => { - draft6::scan_borrowed_object_into_scratch_map(contents, self, references, children); - } - Draft::Draft7 => { - draft7::scan_borrowed_object_into_scratch_map(contents, self, references, children); - } - Draft::Draft201909 => draft201909::scan_borrowed_object_into_scratch_map( - contents, self, references, children, - ), - Draft::Draft202012 | Draft::Unknown => { - draft202012::scan_borrowed_object_into_scratch_map( - contents, self, references, children, - ); - } - } - } - pub(crate) fn scan_owned_object_into_scratch_map<'a>( + pub(crate) fn analyze_object_into<'a, C>( self, contents: &'a Map, - references: &mut ReferenceSlots<'a>, - children: &mut Vec>, - ) -> (Option<&'a str>, bool) { - match self { - Draft::Draft4 => { - draft4::scan_owned_object_into_scratch_map(contents, self, references, children) - } - Draft::Draft6 => { - draft6::scan_owned_object_into_scratch_map(contents, self, references, children) - } - Draft::Draft7 => { - draft7::scan_owned_object_into_scratch_map(contents, self, references, children) - } - Draft::Draft201909 => draft201909::scan_owned_object_into_scratch_map( - contents, self, references, children, - ), - Draft::Draft202012 | Draft::Unknown => draft202012::scan_owned_object_into_scratch_map( - contents, self, references, children, - ), - } - } - pub(crate) fn object_info(self, contents: &Map) -> ObjectInfo<'_> { - match self { - Draft::Draft4 => draft4::object_info(contents), - Draft::Draft6 => draft6::object_info(contents), - Draft::Draft7 => draft7::object_info(contents), - Draft::Draft201909 => draft201909::object_info(contents), - Draft::Draft202012 | Draft::Unknown => draft202012::object_info(contents), - } - } - pub(crate) fn walk_borrowed_subresources_map<'a, E, F>( - self, - contents: &'a Map, - f: &mut F, - ) -> Result<(), E> - where - F: FnMut(&'a Value, Draft) -> Result<(), E>, - { - match self { - Draft::Draft4 => draft4::walk_borrowed_subresources_map(contents, self, f), - Draft::Draft6 => draft6::walk_borrowed_subresources_map(contents, self, f), - Draft::Draft7 => draft7::walk_borrowed_subresources_map(contents, self, f), - Draft::Draft201909 => draft201909::walk_borrowed_subresources_map(contents, self, f), - Draft::Draft202012 | Draft::Unknown => { - draft202012::walk_borrowed_subresources_map(contents, self, f) - } - } - } - pub(crate) fn walk_owned_subresources_map<'a, E, F>( - self, - contents: &'a Map, - path: &JsonPointerNode<'_, '_>, - f: &mut F, - ) -> Result<(), E> + children: &mut C, + ) -> ObjectAnalysis<'a> where - F: FnMut(&JsonPointerNode<'_, '_>, &'a Value, Draft) -> Result<(), E>, + C: ChildBuffer<'a>, { match self { - Draft::Draft4 => draft4::walk_owned_subresources_map(contents, path, self, f), - Draft::Draft6 => draft6::walk_owned_subresources_map(contents, path, self, f), - Draft::Draft7 => draft7::walk_owned_subresources_map(contents, path, self, f), - Draft::Draft201909 => draft201909::walk_owned_subresources_map(contents, path, self, f), + Draft::Draft4 => draft4::analyze_object_into(contents, self, children), + Draft::Draft6 => draft6::analyze_object_into(contents, self, children), + Draft::Draft7 => draft7::analyze_object_into(contents, self, children), + Draft::Draft201909 => draft201909::analyze_object_into(contents, self, children), Draft::Draft202012 | Draft::Unknown => { - draft202012::walk_owned_subresources_map(contents, path, self, f) + draft202012::analyze_object_into(contents, self, children) } } } @@ -337,169 +247,6 @@ impl Draft { } } -fn analyze_legacy_id_object(obj: &Map) -> ObjectScan<'_> { - if obj.len() <= 3 { - return scan_legacy_id_small(obj); - } - - let raw_id = obj.get("id").and_then(Value::as_str); - let has_ref = obj.get("$ref").and_then(Value::as_str).is_some(); - let has_ref_or_schema = has_ref || obj.get("$schema").and_then(Value::as_str).is_some(); - let has_anchor = raw_id.is_some_and(|id| id.starts_with('#')); - let id = match raw_id { - Some(id) if !has_anchor && !has_ref => Some(id), - _ => None, - }; - - ObjectScan { - id, - has_anchor, - has_ref_or_schema, - } -} - -fn scan_legacy_id_small(obj: &Map) -> ObjectScan<'_> { - let mut raw_id = None; - let mut has_ref = false; - let mut has_schema = false; - - for (key, value) in obj { - match key.as_str() { - "id" => raw_id = value.as_str(), - "$ref" => has_ref = value.is_string(), - "$schema" => has_schema = value.is_string(), - _ => {} - } - } - - let has_anchor = raw_id.is_some_and(|id| id.starts_with('#')); - let id = match raw_id { - Some(id) if !has_anchor && !has_ref => Some(id), - _ => None, - }; - - ObjectScan { - id, - has_anchor, - has_ref_or_schema: has_ref || has_schema, - } -} - -fn analyze_legacy_dollar_id_object(obj: &Map) -> ObjectScan<'_> { - if obj.len() <= 3 { - return scan_legacy_dollar_id_small(obj); - } - - let raw_id = obj.get("$id").and_then(Value::as_str); - let has_ref = obj.get("$ref").and_then(Value::as_str).is_some(); - let has_ref_or_schema = has_ref || obj.get("$schema").and_then(Value::as_str).is_some(); - let has_anchor = raw_id.is_some_and(|id| id.starts_with('#')); - let id = match raw_id { - Some(id) if !has_anchor && !has_ref => Some(id), - _ => None, - }; - - ObjectScan { - id, - has_anchor, - has_ref_or_schema, - } -} - -fn scan_legacy_dollar_id_small(obj: &Map) -> ObjectScan<'_> { - let mut raw_id = None; - let mut has_ref = false; - let mut has_schema = false; - - for (key, value) in obj { - match key.as_str() { - "$id" => raw_id = value.as_str(), - "$ref" => has_ref = value.is_string(), - "$schema" => has_schema = value.is_string(), - _ => {} - } - } - - let has_anchor = raw_id.is_some_and(|id| id.starts_with('#')); - let id = match raw_id { - Some(id) if !has_anchor && !has_ref => Some(id), - _ => None, - }; - - ObjectScan { - id, - has_anchor, - has_ref_or_schema: has_ref || has_schema, - } -} - -fn analyze_id_and_anchor_object(obj: &Map) -> ObjectScan<'_> { - if obj.len() <= 2 { - return scan_id_and_anchor_small(obj); - } - - ObjectScan { - id: obj.get("$id").and_then(Value::as_str), - has_anchor: obj.get("$anchor").and_then(Value::as_str).is_some(), - has_ref_or_schema: has_ref_or_schema(obj), - } -} - -fn scan_id_and_anchor_small(obj: &Map) -> ObjectScan<'_> { - let mut id = None; - let mut has_anchor = false; - let mut has_ref_or_schema = false; - - for (key, value) in obj { - match key.as_str() { - "$id" => id = value.as_str(), - "$anchor" => has_anchor |= value.as_str().is_some(), - "$ref" | "$schema" => has_ref_or_schema |= value.is_string(), - _ => {} - } - } - - ObjectScan { - id, - has_anchor, - has_ref_or_schema, - } -} - -fn analyze_id_and_any_anchor_object(obj: &Map) -> ObjectScan<'_> { - if obj.len() <= 3 { - return scan_id_and_any_anchor_small(obj); - } - - ObjectScan { - id: obj.get("$id").and_then(Value::as_str), - has_anchor: obj.get("$anchor").and_then(Value::as_str).is_some() - || obj.get("$dynamicAnchor").and_then(Value::as_str).is_some(), - has_ref_or_schema: has_ref_or_schema(obj), - } -} - -fn scan_id_and_any_anchor_small(obj: &Map) -> ObjectScan<'_> { - let mut id = None; - let mut has_anchor = false; - let mut has_ref_or_schema = false; - - for (key, value) in obj { - match key.as_str() { - "$id" => id = value.as_str(), - "$anchor" | "$dynamicAnchor" => has_anchor |= value.as_str().is_some(), - "$ref" | "$schema" => has_ref_or_schema |= value.is_string(), - _ => {} - } - } - - ObjectScan { - id, - has_anchor, - has_ref_or_schema, - } -} - #[cfg(test)] mod tests { use crate::Draft; diff --git a/crates/jsonschema-referencing/src/registry/build.rs b/crates/jsonschema-referencing/src/registry/build.rs index c2dc9f0d..87462509 100644 --- a/crates/jsonschema-referencing/src/registry/build.rs +++ b/crates/jsonschema-referencing/src/registry/build.rs @@ -18,7 +18,7 @@ use crate::{ cache::UriCache, meta::metas_for_draft, pointer::{pointer, ParsedPointer, ParsedPointerSegment}, - spec::{ChildNode, PathSegment, ReferenceSlots}, + spec::{ChildNode, ObjectAnalysis, PathSegment, ReferenceSlots}, uri, Draft, Error, JsonPointerNode, Retrieve, }; @@ -264,23 +264,6 @@ impl TraversalCtx { } } -/// Unified scan result for a JSON object during BFS traversal. -/// -/// `has_ref_or_schema` is the fast-path flag: set from `ObjectScan.has_ref_or_schema` -/// (borrowed path) or `ref_.is_some() || schema.is_some()` (owned path). -/// `ref_` and `schema` hold actual values only for the owned path (`OwnedStrategy`); -/// they are `None` for the borrowed path where `fill_children` extracts them. -#[derive(Copy, Clone)] -struct ScannedMetadata<'v> { - id: Option<&'v str>, - has_anchor: bool, - has_ref_or_schema: bool, - /// Actual `$ref` value — `Some` only when produced by `OwnedStrategy::scan`. - ref_: Option<&'v str>, - /// Actual `$schema` value — `Some` only when produced by `OwnedStrategy::scan`. - schema: Option<&'v str>, -} - /// Controls how `explore_subtree` registers subresources and traverses children. /// /// Two concrete implementations: @@ -292,15 +275,12 @@ trait SubtreeStrategy<'v>: Copy { /// The element type stored in the child scratch buffer. type Child: Copy + 'v; - /// Scan `object` to extract id, anchor, and ref metadata. - fn scan(draft: Draft, object: &'v Map) -> ScannedMetadata<'v>; - - /// Return `true` when `object` contains only `$ref` / `$schema` and no child schemas. - /// Borrowed always returns `false`; owned checks `object.len() == 1`. - #[inline] - fn is_ref_only(_object: &Map) -> bool { - false - } + /// Analyze `object` once, appending children to strategy-specific scratch. + fn analyze_into( + draft: Draft, + object: &'v Map, + state: &mut ProcessingState<'v>, + ) -> ObjectAnalysis<'v>; /// Register `subschema` in the index at `key`. fn register( @@ -327,19 +307,6 @@ trait SubtreeStrategy<'v>: Copy { has_anchor: bool, ) -> bool; - /// Fill the child scratch with children of `object` and return the reference slots. - /// - /// For borrowed: calls `scan_borrowed_object_into_scratch_map` (fills ref/schema slots - /// and child list in one pass). - /// For owned: calls `scan_owned_object_into_scratch_map` (fills children; ref/schema - /// slots come from the pre-computed `scan` result). - fn fill_children( - draft: Draft, - object: &'v Map, - scan: ScannedMetadata<'v>, - state: &mut ProcessingState<'v>, - ) -> ReferenceSlots<'v>; - /// Slice of children currently in the scratch buffer. fn child_items<'s>(state: &'s ProcessingState<'v>) -> &'s [Self::Child]; @@ -358,20 +325,6 @@ trait SubtreeStrategy<'v>: Copy { resolution_cache: &mut UriCache, local_seen: &mut VisitedRefs<'v>, ) -> Result<(), Error>; - - /// Walk subresources on the no-ref path. - fn walk( - self, - draft: Draft, - object: &'v Map, - base: Arc>, - document_root: &'v Value, - document_root_uri: &Arc>, - state: &mut ProcessingState<'v>, - known_resources: &mut KnownResources, - resolution_cache: &mut UriCache, - local_seen: &mut VisitedRefs<'v>, - ) -> Result<(), Error>; } /// Strategy for schemas borrowed from the caller (zero-copy path). @@ -382,19 +335,14 @@ impl<'v> SubtreeStrategy<'v> for BorrowedStrategy { type Child = (&'v Value, Draft); #[inline] - fn scan(draft: Draft, object: &'v Map) -> ScannedMetadata<'v> { - let s = draft.scan_object(object); - ScannedMetadata { - id: s.id, - has_anchor: s.has_anchor, - has_ref_or_schema: s.has_ref_or_schema, - ref_: None, - schema: None, - } + fn analyze_into( + draft: Draft, + object: &'v Map, + state: &mut ProcessingState<'v>, + ) -> ObjectAnalysis<'v> { + draft.analyze_object_into(object, &mut state.borrowed_child_scratch) } - // is_ref_only: uses the default (false) - #[inline] fn register( self, @@ -418,24 +366,6 @@ impl<'v> SubtreeStrategy<'v> for BorrowedStrategy { !(is_root && new_base == root_uri) } - #[inline] - fn fill_children( - draft: Draft, - object: &'v Map, - _scan: ScannedMetadata<'v>, - state: &mut ProcessingState<'v>, - ) -> ReferenceSlots<'v> { - draft.scan_borrowed_object_into_scratch_map( - object, - &mut state.reference_scratch, - &mut state.borrowed_child_scratch, - ); - ReferenceSlots { - ref_: state.reference_scratch.ref_, - schema: state.reference_scratch.schema, - } - } - #[inline] fn child_items<'s>(state: &'s ProcessingState<'v>) -> &'s [Self::Child] { &state.borrowed_child_scratch @@ -444,8 +374,6 @@ impl<'v> SubtreeStrategy<'v> for BorrowedStrategy { #[inline] fn truncate_children(state: &mut ProcessingState<'v>, to: usize) { state.borrowed_child_scratch.truncate(to); - state.reference_scratch.ref_ = None; - state.reference_scratch.schema = None; } fn recurse( @@ -473,35 +401,6 @@ impl<'v> SubtreeStrategy<'v> for BorrowedStrategy { local_seen, ) } - - fn walk( - self, - draft: Draft, - object: &'v Map, - base: Arc>, - document_root: &'v Value, - document_root_uri: &Arc>, - state: &mut ProcessingState<'v>, - known_resources: &mut KnownResources, - resolution_cache: &mut UriCache, - local_seen: &mut VisitedRefs<'v>, - ) -> Result<(), Error> { - draft.walk_borrowed_subresources_map(object, &mut |child, child_draft| { - explore_subtree( - Arc::clone(&base), - document_root, - child, - child_draft, - false, - document_root_uri, - state, - known_resources, - resolution_cache, - BorrowedStrategy, - local_seen, - ) - }) - } } /// Strategy for owned documents (retrieved at runtime, stored behind `Arc`). @@ -518,20 +417,12 @@ impl<'v> SubtreeStrategy<'v> for OwnedStrategy<'v, '_, '_, '_> { type Child = ChildNode<'v>; #[inline] - fn scan(draft: Draft, object: &'v Map) -> ScannedMetadata<'v> { - let info = draft.object_info(object); - ScannedMetadata { - id: info.id, - has_anchor: info.has_anchor, - has_ref_or_schema: info.ref_.is_some() || info.schema.is_some(), - ref_: info.ref_, - schema: info.schema, - } - } - - #[inline] - fn is_ref_only(object: &Map) -> bool { - object.len() == 1 + fn analyze_into( + draft: Draft, + object: &'v Map, + state: &mut ProcessingState<'v>, + ) -> ObjectAnalysis<'v> { + draft.analyze_object_into(object, &mut state.owned_child_scratch) } #[inline] @@ -558,25 +449,6 @@ impl<'v> SubtreeStrategy<'v> for OwnedStrategy<'v, '_, '_, '_> { !(is_root && new_base == root_uri) && (changed || has_anchor) } - #[inline] - fn fill_children( - draft: Draft, - object: &'v Map, - scan: ScannedMetadata<'v>, - state: &mut ProcessingState<'v>, - ) -> ReferenceSlots<'v> { - let mut throwaway = ReferenceSlots::default(); - draft.scan_owned_object_into_scratch_map( - object, - &mut throwaway, - &mut state.owned_child_scratch, - ); - ReferenceSlots { - ref_: scan.ref_, - schema: scan.schema, - } - } - #[inline] fn child_items<'s>(state: &'s ProcessingState<'v>) -> &'s [Self::Child] { &state.owned_child_scratch @@ -617,49 +489,11 @@ impl<'v> SubtreeStrategy<'v> for OwnedStrategy<'v, '_, '_, '_> { ) }) } - - fn walk( - self, - draft: Draft, - object: &'v Map, - base: Arc>, - document_root: &'v Value, - document_root_uri: &Arc>, - state: &mut ProcessingState<'v>, - known_resources: &mut KnownResources, - resolution_cache: &mut UriCache, - local_seen: &mut VisitedRefs<'v>, - ) -> Result<(), Error> { - draft.walk_owned_subresources_map( - object, - self.path, - &mut |child_path, child, child_draft| { - explore_subtree( - Arc::clone(&base), - document_root, - child, - child_draft, - false, - document_root_uri, - state, - known_resources, - resolution_cache, - OwnedStrategy { - document: self.document, - path: child_path, - }, - local_seen, - ) - }, - ) - } } struct ProcessingState<'a> { queue: VecDeque, custom_metaschemas: Vec, - /// Reused scratch for `$ref`/`$schema` slot capture during both borrowed and owned traversal. - reference_scratch: ReferenceSlots<'a>, borrowed_child_scratch: Vec<(&'a Value, Draft)>, owned_child_scratch: Vec>, index: Index<'a>, @@ -671,7 +505,6 @@ impl ProcessingState<'_> { Self { queue: VecDeque::with_capacity(32), custom_metaschemas: Vec::new(), - reference_scratch: ReferenceSlots::default(), borrowed_child_scratch: Vec::new(), owned_child_scratch: Vec::new(), index: Index::default(), @@ -828,9 +661,11 @@ fn explore_subtree<'v, S: SubtreeStrategy<'v>>( let Some(object) = subschema.as_object() else { return Ok(()); }; - let scan = S::scan(draft, object); + let child_start = S::child_items(state).len(); + let analysis = S::analyze_into(draft, object, state); + let child_end = S::child_items(state).len(); - if let Some(id) = scan.id { + if let Some(id) = analysis.id { let (new_base, changed) = resolve_subresource_id(&base, id, known_resources, resolution_cache)?; base = new_base; @@ -839,79 +674,26 @@ fn explore_subtree<'v, S: SubtreeStrategy<'v>>( &base, document_root_uri, changed, - scan.has_anchor, + analysis.has_anchor, ) { strategy.register(&mut state.index, &base, draft, changed, subschema); } - } else if scan.has_anchor && !is_root_entry { + } else if analysis.has_anchor && !is_root_entry { strategy.register(&mut state.index, &base, draft, false, subschema); } - if scan.has_ref_or_schema { - if S::is_ref_only(object) { - let subschema_ptr = std::ptr::from_ref::(subschema) as usize; - if state.ctx.visited_schemas.insert(subschema_ptr) { - let slots = ReferenceSlots { - ref_: scan.ref_, - schema: scan.schema, - }; - collect_external_resources_from_slots( - &base, - document_root, - &slots, - &mut state.ctx, - resolution_cache, - draft, - document_root_uri, - local_seen, - )?; - } - return Ok(()); - } - - let child_start = S::child_items(state).len(); - let slots = S::fill_children(draft, object, scan, state); - let child_end = S::child_items(state).len(); - - let subschema_ptr = std::ptr::from_ref::(subschema) as usize; - if state.ctx.visited_schemas.insert(subschema_ptr) { - collect_external_resources_from_slots( - &base, - document_root, - &slots, - &mut state.ctx, - resolution_cache, - draft, - document_root_uri, - local_seen, - )?; - } - - let mut idx = child_start; - while idx < child_end { - let child = S::child_items(state)[idx]; - idx += 1; - strategy.recurse( - child, - Arc::clone(&base), - document_root, - document_root_uri, - state, - known_resources, - resolution_cache, - local_seen, - )?; - } - S::truncate_children(state, child_start); - return Ok(()); - } - let subschema_ptr = std::ptr::from_ref::(subschema) as usize; - if state.ctx.visited_schemas.insert(subschema_ptr) { - collect_external_resources( + if state.ctx.visited_schemas.insert(subschema_ptr) + && (analysis.ref_.is_some() || analysis.schema.is_some()) + { + let slots = ReferenceSlots { + ref_: analysis.ref_, + schema: analysis.schema, + }; + collect_external_resources_from_slots( &base, document_root, - subschema, + &slots, &mut state.ctx, resolution_cache, draft, @@ -919,17 +701,24 @@ fn explore_subtree<'v, S: SubtreeStrategy<'v>>( local_seen, )?; } - strategy.walk( - draft, - object, - base, - document_root, - document_root_uri, - state, - known_resources, - resolution_cache, - local_seen, - ) + + let mut idx = child_start; + while idx < child_end { + let child = S::child_items(state)[idx]; + idx += 1; + strategy.recurse( + child, + Arc::clone(&base), + document_root, + document_root_uri, + state, + known_resources, + resolution_cache, + local_seen, + )?; + } + S::truncate_children(state, child_start); + Ok(()) } fn explore_borrowed_subtree<'r>( diff --git a/crates/jsonschema-referencing/src/spec/draft201909.rs b/crates/jsonschema-referencing/src/spec/draft201909.rs index 838ef915..af2cf5fd 100644 --- a/crates/jsonschema-referencing/src/spec/draft201909.rs +++ b/crates/jsonschema-referencing/src/spec/draft201909.rs @@ -2,36 +2,12 @@ use serde_json::{Map, Value}; use crate::{ draft::Draft, - spec::{ChildNode, ObjectInfo, ReferenceSlots}, - Error, JsonPointerNode, Resolver, ResourceRef, Segments, + spec::{ChildBuffer, ObjectAnalysis}, + Error, Resolver, ResourceRef, Segments, }; use super::draft202012::{self, SubresourceIteratorInner}; -pub(crate) fn object_info(schema: &Map) -> ObjectInfo<'_> { - let mut id = None; - let mut has_anchor = false; - let mut ref_ = None; - let mut schema_ref = None; - - for (key, value) in schema { - match key.as_str() { - "$id" => id = value.as_str(), - "$anchor" => has_anchor |= value.as_str().is_some(), - "$ref" => ref_ = value.as_str(), - "$schema" => schema_ref = value.as_str(), - _ => {} - } - } - - ObjectInfo { - id, - has_anchor, - ref_, - schema: schema_ref, - } -} - fn visit_child<'a, E>( key: &'a str, value: &'a Value, @@ -101,63 +77,32 @@ fn visit_child<'a, E>( Ok(()) } -pub(crate) fn scan_borrowed_object_into_scratch_map<'a>( +pub(crate) fn analyze_object_into<'a, C>( schema: &'a Map, draft: Draft, - references: &mut ReferenceSlots<'a>, - children: &mut Vec<(&'a Value, Draft)>, -) { - for (key, value) in schema { - match key.as_str() { - "$ref" => { - if let Some(reference) = value.as_str() { - references.ref_ = Some(reference); - } - } - "$schema" => { - if let Some(reference) = value.as_str() { - references.schema = Some(reference); - } - } - other => { - let _ = visit_child(other, value, draft, &mut |_kw, _nk, _idx, v, d| { - children.push((v, d)); - Ok::<(), std::convert::Infallible>(()) - }); - } - } - } -} - -pub(crate) fn scan_owned_object_into_scratch_map<'a>( - schema: &'a Map, - draft: Draft, - references: &mut ReferenceSlots<'a>, - children: &mut Vec>, -) -> (Option<&'a str>, bool) { + children: &mut C, +) -> ObjectAnalysis<'a> +where + C: ChildBuffer<'a>, +{ let mut id = None; let mut has_anchor = false; + let mut ref_ = None; + let mut schema_ref = None; for (key, value) in schema { match key.as_str() { "$id" => id = value.as_str(), "$anchor" => has_anchor |= value.as_str().is_some(), - "$ref" => { - if let Some(reference) = value.as_str() { - references.ref_ = Some(reference); - } - } - "$schema" => { - if let Some(reference) = value.as_str() { - references.schema = Some(reference); - } - } + "$ref" => ref_ = value.as_str(), + "$schema" => schema_ref = value.as_str(), + "$dynamicAnchor" => {} other => { let _ = visit_child(other, value, draft, &mut |kw, nk, idx, v, d| { match (nk, idx) { - (Some(k), _) => children.push(ChildNode::key_key(kw, k, v, d)), - (_, Some(i)) => children.push(ChildNode::key_index(kw, i, v, d)), - _ => children.push(ChildNode::key(kw, v, d)), + (Some(k), _) => children.push_key_key(kw, k, v, d), + (_, Some(i)) => children.push_key_index(kw, i, v, d), + _ => children.push_key(kw, v, d), } Ok::<(), std::convert::Infallible>(()) }); @@ -165,51 +110,12 @@ pub(crate) fn scan_owned_object_into_scratch_map<'a>( } } - (id, has_anchor) -} - -pub(crate) fn walk_borrowed_subresources_map<'a, E, F>( - schema: &'a Map, - draft: Draft, - f: &mut F, -) -> Result<(), E> -where - F: FnMut(&'a Value, Draft) -> Result<(), E>, -{ - for (key, value) in schema { - visit_child(key.as_str(), value, draft, &mut |_kw, _nk, _idx, v, d| { - f(v, d) - })?; - } - Ok(()) -} - -pub(crate) fn walk_owned_subresources_map<'a, E, F>( - schema: &'a Map, - path: &JsonPointerNode<'_, '_>, - draft: Draft, - f: &mut F, -) -> Result<(), E> -where - F: FnMut(&JsonPointerNode<'_, '_>, &'a Value, Draft) -> Result<(), E>, -{ - for (key, value) in schema { - visit_child(key.as_str(), value, draft, &mut |kw, nk, idx, v, d| { - let parent = path.push(kw); - match (nk, idx) { - (Some(k), _) => { - let child_path = parent.push(k); - f(&child_path, v, d) - } - (_, Some(i)) => { - let child_path = parent.push(i); - f(&child_path, v, d) - } - _ => f(&parent, v, d), - } - })?; + ObjectAnalysis { + id, + has_anchor, + ref_, + schema: schema_ref, } - Ok(()) } pub(crate) fn object_iter<'a>( @@ -291,3 +197,40 @@ pub(crate) fn maybe_in_subresource<'r>( IN_CHILD, ) } + +#[cfg(test)] +mod tests { + use crate::Draft; + use serde_json::json; + + #[test] + fn test_analyze_object_into_2019_only_counts_plain_anchor() { + let schema = json!({ + "$anchor": "plain", + "properties": { + "name": { "type": "string" } + } + }); + + let object = schema.as_object().unwrap(); + let mut children: Vec<(&serde_json::Value, Draft)> = Vec::new(); + let analysis = Draft::Draft201909.analyze_object_into(object, &mut children); + + assert!(analysis.has_anchor); + assert_eq!(children.len(), 1); + + let dynamic_only = json!({ + "$dynamicAnchor": "ignored", + "properties": { + "name": { "type": "string" } + } + }); + + let object = dynamic_only.as_object().unwrap(); + let mut children: Vec<(&serde_json::Value, Draft)> = Vec::new(); + let analysis = Draft::Draft201909.analyze_object_into(object, &mut children); + + assert!(!analysis.has_anchor); + assert_eq!(children.len(), 1); + } +} diff --git a/crates/jsonschema-referencing/src/spec/draft202012.rs b/crates/jsonschema-referencing/src/spec/draft202012.rs index 3a3470bc..c76763de 100644 --- a/crates/jsonschema-referencing/src/spec/draft202012.rs +++ b/crates/jsonschema-referencing/src/spec/draft202012.rs @@ -6,8 +6,8 @@ use serde_json::{Map, Value}; use crate::{ draft::Draft, segments::Segment, - spec::{ChildNode, ObjectInfo, ReferenceSlots}, - Error, JsonPointerNode, Resolver, ResourceRef, Segments, + spec::{ChildBuffer, ObjectAnalysis}, + Error, Resolver, ResourceRef, Segments, }; fn visit_child<'a, E>( @@ -55,35 +55,14 @@ fn visit_child<'a, E>( Ok(()) } -pub(crate) fn scan_borrowed_object_into_scratch_map<'a>( +pub(crate) fn analyze_object_into<'a, C>( schema: &'a Map, draft: Draft, - references: &mut ReferenceSlots<'a>, - children: &mut Vec<(&'a Value, Draft)>, -) { - for (key, value) in schema { - match key.as_str() { - "$ref" => { - if let Some(reference) = value.as_str() { - references.ref_ = Some(reference); - } - } - "$schema" => { - if let Some(reference) = value.as_str() { - references.schema = Some(reference); - } - } - other => { - let _ = visit_child(other, value, draft, &mut |_kw, _nk, _idx, v, d| { - children.push((v, d)); - Ok::<(), std::convert::Infallible>(()) - }); - } - } - } -} - -pub(crate) fn object_info(schema: &Map) -> ObjectInfo<'_> { + children: &mut C, +) -> ObjectAnalysis<'a> +where + C: ChildBuffer<'a>, +{ let mut id = None; let mut has_anchor = false; let mut ref_ = None; @@ -95,47 +74,12 @@ pub(crate) fn object_info(schema: &Map) -> ObjectInfo<'_> { "$anchor" | "$dynamicAnchor" => has_anchor |= value.as_str().is_some(), "$ref" => ref_ = value.as_str(), "$schema" => schema_ref = value.as_str(), - _ => {} - } - } - - ObjectInfo { - id, - has_anchor, - ref_, - schema: schema_ref, - } -} - -pub(crate) fn scan_owned_object_into_scratch_map<'a>( - schema: &'a Map, - draft: Draft, - references: &mut ReferenceSlots<'a>, - children: &mut Vec>, -) -> (Option<&'a str>, bool) { - let mut id = None; - let mut has_anchor = false; - - for (key, value) in schema { - match key.as_str() { - "$id" => id = value.as_str(), - "$anchor" | "$dynamicAnchor" => has_anchor |= value.as_str().is_some(), - "$ref" => { - if let Some(reference) = value.as_str() { - references.ref_ = Some(reference); - } - } - "$schema" => { - if let Some(reference) = value.as_str() { - references.schema = Some(reference); - } - } other => { let _ = visit_child(other, value, draft, &mut |kw, nk, idx, v, d| { match (nk, idx) { - (Some(k), _) => children.push(ChildNode::key_key(kw, k, v, d)), - (_, Some(i)) => children.push(ChildNode::key_index(kw, i, v, d)), - _ => children.push(ChildNode::key(kw, v, d)), + (Some(k), _) => children.push_key_key(kw, k, v, d), + (_, Some(i)) => children.push_key_index(kw, i, v, d), + _ => children.push_key(kw, v, d), } Ok::<(), std::convert::Infallible>(()) }); @@ -143,51 +87,12 @@ pub(crate) fn scan_owned_object_into_scratch_map<'a>( } } - (id, has_anchor) -} - -pub(crate) fn walk_owned_subresources_map<'a, E, F>( - schema: &'a Map, - path: &JsonPointerNode<'_, '_>, - draft: Draft, - f: &mut F, -) -> Result<(), E> -where - F: FnMut(&JsonPointerNode<'_, '_>, &'a Value, Draft) -> Result<(), E>, -{ - for (key, value) in schema { - visit_child(key.as_str(), value, draft, &mut |kw, nk, idx, v, d| { - let parent = path.push(kw); - match (nk, idx) { - (Some(k), _) => { - let child_path = parent.push(k); - f(&child_path, v, d) - } - (_, Some(i)) => { - let child_path = parent.push(i); - f(&child_path, v, d) - } - _ => f(&parent, v, d), - } - })?; - } - Ok(()) -} - -pub(crate) fn walk_borrowed_subresources_map<'a, E, F>( - schema: &'a Map, - draft: Draft, - f: &mut F, -) -> Result<(), E> -where - F: FnMut(&'a Value, Draft) -> Result<(), E>, -{ - for (key, value) in schema { - visit_child(key.as_str(), value, draft, &mut |_kw, _nk, _idx, v, d| { - f(v, d) - })?; + ObjectAnalysis { + id, + has_anchor, + ref_, + schema: schema_ref, } - Ok(()) } type ObjectIter<'a> = FlatMap< @@ -527,63 +432,31 @@ mod tests { } #[test] - fn test_walk_borrowed_subresources_matches_iterator_order() { + fn test_analyze_object_into_collects_2020_12_metadata_and_children() { let schema = json!({ + "$id": "https://example.com/root", + "$dynamicAnchor": "node", + "$ref": "other.json", + "$schema": "https://json-schema.org/draft/2020-12/schema", "properties": { - "name": {"type": "string"} + "name": { "type": "string" } }, "allOf": [ - {"minimum": 1} + { "minimum": 1 } ] }); - let expected: Vec<_> = Draft::Draft202012 - .subresources_of(&schema) - .map(|subschema| (subschema.clone(), Draft::Draft202012.detect(subschema))) - .collect(); - let mut seen = Vec::new(); - - Draft::Draft202012 - .walk_borrowed_subresources_map( - schema.as_object().expect("schema object should be walked"), - &mut |subschema, draft| { - seen.push((subschema.clone(), draft)); - Ok::<(), ()>(()) - }, - ) - .unwrap(); - - assert_eq!(seen, expected); - } - - #[test] - fn test_walk_owned_subresources_reports_pointer_path() { - let schema = json!({ - "properties": { - "name": {"type": "string"} - } - }); - let root = crate::JsonPointerNode::new(); - let mut seen = Vec::new(); - Draft::Draft202012 - .walk_owned_subresources_map( - schema.as_object().expect("schema object should be walked"), - &root, - &mut |path, subschema, draft| { - let pointer = crate::OwnedJsonPointer::from(path); - seen.push((pointer.as_str().to_string(), subschema.clone(), draft)); - Ok::<(), ()>(()) - }, - ) - .unwrap(); + let object = schema.as_object().unwrap(); + let mut children: Vec<(&serde_json::Value, Draft)> = Vec::new(); + let analysis = Draft::Draft202012.analyze_object_into(object, &mut children); + assert_eq!(analysis.id, Some("https://example.com/root")); + assert!(analysis.has_anchor); + assert_eq!(analysis.ref_, Some("other.json")); assert_eq!( - seen, - vec![( - "/properties/name".to_string(), - json!({"type": "string"}), - Draft::Draft202012, - )] + analysis.schema, + Some("https://json-schema.org/draft/2020-12/schema") ); + assert_eq!(children.len(), 2); } } diff --git a/crates/jsonschema-referencing/src/spec/draft4.rs b/crates/jsonschema-referencing/src/spec/draft4.rs index 2c5bc44a..7002e9dc 100644 --- a/crates/jsonschema-referencing/src/spec/draft4.rs +++ b/crates/jsonschema-referencing/src/spec/draft4.rs @@ -2,40 +2,12 @@ use serde_json::{Map, Value}; use crate::{ draft::Draft, - spec::{ChildNode, ObjectInfo, ReferenceSlots}, - Error, JsonPointerNode, Resolver, ResourceRef, Segments, + spec::{ChildBuffer, ObjectAnalysis}, + Error, Resolver, ResourceRef, Segments, }; use super::draft202012::{self, SubresourceIteratorInner}; -pub(crate) fn object_info(schema: &Map) -> ObjectInfo<'_> { - let mut raw_id = None; - let mut ref_ = None; - let mut schema_ref = None; - - for (key, value) in schema { - match key.as_str() { - "id" => raw_id = value.as_str(), - "$ref" => ref_ = value.as_str(), - "$schema" => schema_ref = value.as_str(), - _ => {} - } - } - - let has_anchor = raw_id.is_some_and(|id| id.starts_with('#')); - let id = match raw_id { - Some(id) if !has_anchor && ref_.is_none() => Some(id), - _ => None, - }; - - ObjectInfo { - id, - has_anchor, - ref_, - schema: schema_ref, - } -} - fn visit_child<'a, E>( key: &'a str, value: &'a Value, @@ -106,63 +78,29 @@ fn visit_child<'a, E>( Ok(()) } -pub(crate) fn scan_borrowed_object_into_scratch_map<'a>( +pub(crate) fn analyze_object_into<'a, C>( schema: &'a Map, draft: Draft, - references: &mut ReferenceSlots<'a>, - children: &mut Vec<(&'a Value, Draft)>, -) { - for (key, value) in schema { - match key.as_str() { - "$ref" => { - if let Some(reference) = value.as_str() { - references.ref_ = Some(reference); - } - } - "$schema" => { - if let Some(reference) = value.as_str() { - references.schema = Some(reference); - } - } - other => { - let _ = visit_child(other, value, draft, &mut |_kw, _nk, _idx, v, d| { - children.push((v, d)); - Ok::<(), std::convert::Infallible>(()) - }); - } - } - } -} - -pub(crate) fn scan_owned_object_into_scratch_map<'a>( - schema: &'a Map, - draft: Draft, - references: &mut ReferenceSlots<'a>, - children: &mut Vec>, -) -> (Option<&'a str>, bool) { + children: &mut C, +) -> ObjectAnalysis<'a> +where + C: ChildBuffer<'a>, +{ let mut raw_id = None; - let mut has_ref = false; + let mut ref_ = None; + let mut schema_ref = None; for (key, value) in schema { match key.as_str() { "id" => raw_id = value.as_str(), - "$ref" => { - if let Some(reference) = value.as_str() { - has_ref = true; - references.ref_ = Some(reference); - } - } - "$schema" => { - if let Some(reference) = value.as_str() { - references.schema = Some(reference); - } - } + "$ref" => ref_ = value.as_str(), + "$schema" => schema_ref = value.as_str(), other => { let _ = visit_child(other, value, draft, &mut |kw, nk, idx, v, d| { match (nk, idx) { - (Some(k), _) => children.push(ChildNode::key_key(kw, k, v, d)), - (_, Some(i)) => children.push(ChildNode::key_index(kw, i, v, d)), - _ => children.push(ChildNode::key(kw, v, d)), + (Some(k), _) => children.push_key_key(kw, k, v, d), + (_, Some(i)) => children.push_key_index(kw, i, v, d), + _ => children.push_key(kw, v, d), } Ok::<(), std::convert::Infallible>(()) }); @@ -172,54 +110,16 @@ pub(crate) fn scan_owned_object_into_scratch_map<'a>( let has_anchor = raw_id.is_some_and(|id| id.starts_with('#')); let id = match raw_id { - Some(id) if !has_anchor && !has_ref => Some(id), + Some(id) if !has_anchor && ref_.is_none() => Some(id), _ => None, }; - (id, has_anchor) -} - -pub(crate) fn walk_borrowed_subresources_map<'a, E, F>( - schema: &'a Map, - draft: Draft, - f: &mut F, -) -> Result<(), E> -where - F: FnMut(&'a Value, Draft) -> Result<(), E>, -{ - for (key, value) in schema { - visit_child(key.as_str(), value, draft, &mut |_kw, _nk, _idx, v, d| { - f(v, d) - })?; - } - Ok(()) -} -pub(crate) fn walk_owned_subresources_map<'a, E, F>( - schema: &'a Map, - path: &JsonPointerNode<'_, '_>, - draft: Draft, - f: &mut F, -) -> Result<(), E> -where - F: FnMut(&JsonPointerNode<'_, '_>, &'a Value, Draft) -> Result<(), E>, -{ - for (key, value) in schema { - visit_child(key.as_str(), value, draft, &mut |kw, nk, idx, v, d| { - let parent = path.push(kw); - match (nk, idx) { - (Some(k), _) => { - let child_path = parent.push(k); - f(&child_path, v, d) - } - (_, Some(i)) => { - let child_path = parent.push(i); - f(&child_path, v, d) - } - _ => f(&parent, v, d), - } - })?; + ObjectAnalysis { + id, + has_anchor, + ref_, + schema: schema_ref, } - Ok(()) } pub(crate) fn object_iter<'a>( @@ -299,11 +199,11 @@ pub(crate) fn maybe_in_subresource<'r>( #[cfg(test)] mod tests { - use crate::{spec::ReferenceSlots, Draft}; + use crate::Draft; use serde_json::json; #[test] - fn test_scan_object_collects_control_keys() { + fn test_analyze_object_into_collects_control_keys() { let schema = json!({ "id": "http://example.com/node", "$schema": "http://example.com/meta", @@ -312,19 +212,23 @@ mod tests { }, "items": {"type": "integer"} }); - let analysis = Draft::Draft4.scan_object( + let mut children: Vec<(&serde_json::Value, Draft)> = Vec::new(); + let analysis = Draft::Draft4.analyze_object_into( schema .as_object() .expect("schema object should be analyzed"), + &mut children, ); assert_eq!(analysis.id, Some("http://example.com/node")); assert!(!analysis.has_anchor); - assert!(analysis.has_ref_or_schema); + assert_eq!(analysis.ref_, None); + assert_eq!(analysis.schema, Some("http://example.com/meta")); + assert_eq!(children.len(), 2); } #[test] - fn test_scan_borrowed_object_into_scratch_collects_refs_and_children() { + fn test_analyze_object_into_collects_refs_and_children() { let schema = json!({ "id": "http://example.com/node", "$schema": "http://example.com/meta", @@ -333,30 +237,19 @@ mod tests { }, "items": {"type": "integer"} }); - let mut references = ReferenceSlots::default(); - let mut children = Vec::new(); + let mut children: Vec<(&serde_json::Value, Draft)> = Vec::new(); - Draft::Draft4.scan_borrowed_object_into_scratch_map( + let analysis = Draft::Draft4.analyze_object_into( schema.as_object().expect("schema object should be scanned"), - &mut references, &mut children, ); assert_eq!( ( - references.ref_.map(str::to_string), - references.schema.map(str::to_string) + analysis.ref_.map(str::to_string), + analysis.schema.map(str::to_string) ), - vec![("$schema".to_string(), "http://example.com/meta".to_string())] - .into_iter() - .fold((None, None), |mut acc, (key, value)| { - if key == "$ref" { - acc.0 = Some(value); - } else { - acc.1 = Some(value); - } - acc - }) + (None, Some("http://example.com/meta".to_string())) ); let children: Vec<_> = children .iter() @@ -366,4 +259,22 @@ mod tests { assert!(children.contains(&(json!({"type": "string"}), Draft::Draft4))); assert!(children.contains(&(json!({"type": "integer"}), Draft::Draft4))); } + + #[test] + fn test_analyze_object_into_draft4_treats_hash_id_as_anchor() { + let schema = json!({ + "id": "#node", + "dependencies": { + "name": { "type": "string" } + } + }); + + let object = schema.as_object().unwrap(); + let mut children: Vec<(&serde_json::Value, Draft)> = Vec::new(); + let analysis = Draft::Draft4.analyze_object_into(object, &mut children); + + assert!(analysis.has_anchor); + assert_eq!(analysis.id, None); + assert_eq!(children.len(), 1); + } } diff --git a/crates/jsonschema-referencing/src/spec/draft6.rs b/crates/jsonschema-referencing/src/spec/draft6.rs index 27d89bab..be88674b 100644 --- a/crates/jsonschema-referencing/src/spec/draft6.rs +++ b/crates/jsonschema-referencing/src/spec/draft6.rs @@ -2,40 +2,12 @@ use serde_json::{Map, Value}; use crate::{ draft::Draft, - spec::{ChildNode, ObjectInfo, ReferenceSlots}, - Error, JsonPointerNode, Resolver, ResourceRef, Segments, + spec::{ChildBuffer, ObjectAnalysis}, + Error, Resolver, ResourceRef, Segments, }; use super::draft202012::{self, SubresourceIteratorInner}; -pub(crate) fn object_info(schema: &Map) -> ObjectInfo<'_> { - let mut raw_id = None; - let mut ref_ = None; - let mut schema_ref = None; - - for (key, value) in schema { - match key.as_str() { - "$id" => raw_id = value.as_str(), - "$ref" => ref_ = value.as_str(), - "$schema" => schema_ref = value.as_str(), - _ => {} - } - } - - let has_anchor = raw_id.is_some_and(|id| id.starts_with('#')); - let id = match raw_id { - Some(id) if !has_anchor && ref_.is_none() => Some(id), - _ => None, - }; - - ObjectInfo { - id, - has_anchor, - ref_, - schema: schema_ref, - } -} - fn visit_child<'a, E>( key: &'a str, value: &'a Value, @@ -95,63 +67,29 @@ fn visit_child<'a, E>( Ok(()) } -pub(crate) fn scan_borrowed_object_into_scratch_map<'a>( - schema: &'a Map, - draft: Draft, - references: &mut ReferenceSlots<'a>, - children: &mut Vec<(&'a Value, Draft)>, -) { - for (key, value) in schema { - match key.as_str() { - "$ref" => { - if let Some(reference) = value.as_str() { - references.ref_ = Some(reference); - } - } - "$schema" => { - if let Some(reference) = value.as_str() { - references.schema = Some(reference); - } - } - other => { - let _ = visit_child(other, value, draft, &mut |_kw, _nk, _idx, v, d| { - children.push((v, d)); - Ok::<(), std::convert::Infallible>(()) - }); - } - } - } -} - -pub(crate) fn scan_owned_object_into_scratch_map<'a>( +pub(crate) fn analyze_object_into<'a, C>( schema: &'a Map, draft: Draft, - references: &mut ReferenceSlots<'a>, - children: &mut Vec>, -) -> (Option<&'a str>, bool) { + children: &mut C, +) -> ObjectAnalysis<'a> +where + C: ChildBuffer<'a>, +{ let mut raw_id = None; - let mut has_ref = false; + let mut ref_ = None; + let mut schema_ref = None; for (key, value) in schema { match key.as_str() { "$id" => raw_id = value.as_str(), - "$ref" => { - if let Some(reference) = value.as_str() { - has_ref = true; - references.ref_ = Some(reference); - } - } - "$schema" => { - if let Some(reference) = value.as_str() { - references.schema = Some(reference); - } - } + "$ref" => ref_ = value.as_str(), + "$schema" => schema_ref = value.as_str(), other => { let _ = visit_child(other, value, draft, &mut |kw, nk, idx, v, d| { match (nk, idx) { - (Some(k), _) => children.push(ChildNode::key_key(kw, k, v, d)), - (_, Some(i)) => children.push(ChildNode::key_index(kw, i, v, d)), - _ => children.push(ChildNode::key(kw, v, d)), + (Some(k), _) => children.push_key_key(kw, k, v, d), + (_, Some(i)) => children.push_key_index(kw, i, v, d), + _ => children.push_key(kw, v, d), } Ok::<(), std::convert::Infallible>(()) }); @@ -161,55 +99,16 @@ pub(crate) fn scan_owned_object_into_scratch_map<'a>( let has_anchor = raw_id.is_some_and(|id| id.starts_with('#')); let id = match raw_id { - Some(id) if !has_anchor && !has_ref => Some(id), + Some(id) if !has_anchor && ref_.is_none() => Some(id), _ => None, }; - (id, has_anchor) -} - -pub(crate) fn walk_borrowed_subresources_map<'a, E, F>( - schema: &'a Map, - draft: Draft, - f: &mut F, -) -> Result<(), E> -where - F: FnMut(&'a Value, Draft) -> Result<(), E>, -{ - for (key, value) in schema { - visit_child(key.as_str(), value, draft, &mut |_kw, _nk, _idx, v, d| { - f(v, d) - })?; - } - Ok(()) -} - -pub(crate) fn walk_owned_subresources_map<'a, E, F>( - schema: &'a Map, - path: &JsonPointerNode<'_, '_>, - draft: Draft, - f: &mut F, -) -> Result<(), E> -where - F: FnMut(&JsonPointerNode<'_, '_>, &'a Value, Draft) -> Result<(), E>, -{ - for (key, value) in schema { - visit_child(key.as_str(), value, draft, &mut |kw, nk, idx, v, d| { - let parent = path.push(kw); - match (nk, idx) { - (Some(k), _) => { - let child_path = parent.push(k); - f(&child_path, v, d) - } - (_, Some(i)) => { - let child_path = parent.push(i); - f(&child_path, v, d) - } - _ => f(&parent, v, d), - } - })?; + ObjectAnalysis { + id, + has_anchor, + ref_, + schema: schema_ref, } - Ok(()) } pub(crate) fn object_iter<'a>( @@ -276,3 +175,27 @@ pub(crate) fn maybe_in_subresource<'r>( IN_CHILD, ) } + +#[cfg(test)] +mod tests { + use crate::Draft; + use serde_json::json; + + #[test] + fn test_analyze_object_into_draft6_keeps_plain_id_as_resource() { + let schema = json!({ + "$id": "child.json", + "properties": { + "name": { "type": "string" } + } + }); + + let object = schema.as_object().unwrap(); + let mut children: Vec<(&serde_json::Value, Draft)> = Vec::new(); + let analysis = Draft::Draft6.analyze_object_into(object, &mut children); + + assert_eq!(analysis.id, Some("child.json")); + assert!(!analysis.has_anchor); + assert_eq!(children.len(), 1); + } +} diff --git a/crates/jsonschema-referencing/src/spec/draft7.rs b/crates/jsonschema-referencing/src/spec/draft7.rs index 36b61f37..48a8faf9 100644 --- a/crates/jsonschema-referencing/src/spec/draft7.rs +++ b/crates/jsonschema-referencing/src/spec/draft7.rs @@ -2,40 +2,12 @@ use serde_json::{Map, Value}; use crate::{ draft::Draft, - spec::{ChildNode, ObjectInfo, ReferenceSlots}, - Error, JsonPointerNode, Resolver, ResourceRef, Segments, + spec::{ChildBuffer, ObjectAnalysis}, + Error, Resolver, ResourceRef, Segments, }; use super::draft202012::{self, SubresourceIteratorInner}; -pub(crate) fn object_info(schema: &Map) -> ObjectInfo<'_> { - let mut raw_id = None; - let mut ref_ = None; - let mut schema_ref = None; - - for (key, value) in schema { - match key.as_str() { - "$id" => raw_id = value.as_str(), - "$ref" => ref_ = value.as_str(), - "$schema" => schema_ref = value.as_str(), - _ => {} - } - } - - let has_anchor = raw_id.is_some_and(|id| id.starts_with('#')); - let id = match raw_id { - Some(id) if !has_anchor && ref_.is_none() => Some(id), - _ => None, - }; - - ObjectInfo { - id, - has_anchor, - ref_, - schema: schema_ref, - } -} - fn visit_child<'a, E>( key: &'a str, value: &'a Value, @@ -102,63 +74,29 @@ fn visit_child<'a, E>( Ok(()) } -pub(crate) fn scan_borrowed_object_into_scratch_map<'a>( - schema: &'a Map, - draft: Draft, - references: &mut ReferenceSlots<'a>, - children: &mut Vec<(&'a Value, Draft)>, -) { - for (key, value) in schema { - match key.as_str() { - "$ref" => { - if let Some(reference) = value.as_str() { - references.ref_ = Some(reference); - } - } - "$schema" => { - if let Some(reference) = value.as_str() { - references.schema = Some(reference); - } - } - other => { - let _ = visit_child(other, value, draft, &mut |_kw, _nk, _idx, v, d| { - children.push((v, d)); - Ok::<(), std::convert::Infallible>(()) - }); - } - } - } -} - -pub(crate) fn scan_owned_object_into_scratch_map<'a>( +pub(crate) fn analyze_object_into<'a, C>( schema: &'a Map, draft: Draft, - references: &mut ReferenceSlots<'a>, - children: &mut Vec>, -) -> (Option<&'a str>, bool) { + children: &mut C, +) -> ObjectAnalysis<'a> +where + C: ChildBuffer<'a>, +{ let mut raw_id = None; - let mut has_ref = false; + let mut ref_ = None; + let mut schema_ref = None; for (key, value) in schema { match key.as_str() { "$id" => raw_id = value.as_str(), - "$ref" => { - if let Some(reference) = value.as_str() { - has_ref = true; - references.ref_ = Some(reference); - } - } - "$schema" => { - if let Some(reference) = value.as_str() { - references.schema = Some(reference); - } - } + "$ref" => ref_ = value.as_str(), + "$schema" => schema_ref = value.as_str(), other => { let _ = visit_child(other, value, draft, &mut |kw, nk, idx, v, d| { match (nk, idx) { - (Some(k), _) => children.push(ChildNode::key_key(kw, k, v, d)), - (_, Some(i)) => children.push(ChildNode::key_index(kw, i, v, d)), - _ => children.push(ChildNode::key(kw, v, d)), + (Some(k), _) => children.push_key_key(kw, k, v, d), + (_, Some(i)) => children.push_key_index(kw, i, v, d), + _ => children.push_key(kw, v, d), } Ok::<(), std::convert::Infallible>(()) }); @@ -168,55 +106,16 @@ pub(crate) fn scan_owned_object_into_scratch_map<'a>( let has_anchor = raw_id.is_some_and(|id| id.starts_with('#')); let id = match raw_id { - Some(id) if !has_anchor && !has_ref => Some(id), + Some(id) if !has_anchor && ref_.is_none() => Some(id), _ => None, }; - (id, has_anchor) -} - -pub(crate) fn walk_borrowed_subresources_map<'a, E, F>( - schema: &'a Map, - draft: Draft, - f: &mut F, -) -> Result<(), E> -where - F: FnMut(&'a Value, Draft) -> Result<(), E>, -{ - for (key, value) in schema { - visit_child(key.as_str(), value, draft, &mut |_kw, _nk, _idx, v, d| { - f(v, d) - })?; - } - Ok(()) -} - -pub(crate) fn walk_owned_subresources_map<'a, E, F>( - schema: &'a Map, - path: &JsonPointerNode<'_, '_>, - draft: Draft, - f: &mut F, -) -> Result<(), E> -where - F: FnMut(&JsonPointerNode<'_, '_>, &'a Value, Draft) -> Result<(), E>, -{ - for (key, value) in schema { - visit_child(key.as_str(), value, draft, &mut |kw, nk, idx, v, d| { - let parent = path.push(kw); - match (nk, idx) { - (Some(k), _) => { - let child_path = parent.push(k); - f(&child_path, v, d) - } - (_, Some(i)) => { - let child_path = parent.push(i); - f(&child_path, v, d) - } - _ => f(&parent, v, d), - } - })?; + ObjectAnalysis { + id, + has_anchor, + ref_, + schema: schema_ref, } - Ok(()) } pub(crate) fn object_iter<'a>( @@ -300,3 +199,27 @@ pub(crate) fn maybe_in_subresource<'r>( IN_CHILD, ) } + +#[cfg(test)] +mod tests { + use crate::Draft; + use serde_json::json; + + #[test] + fn test_analyze_object_into_draft7_keeps_plain_id_as_resource() { + let schema = json!({ + "$id": "child.json", + "properties": { + "name": { "type": "string" } + } + }); + + let object = schema.as_object().unwrap(); + let mut children: Vec<(&serde_json::Value, Draft)> = Vec::new(); + let analysis = Draft::Draft7.analyze_object_into(object, &mut children); + + assert_eq!(analysis.id, Some("child.json")); + assert!(!analysis.has_anchor); + assert_eq!(children.len(), 1); + } +} diff --git a/crates/jsonschema-referencing/src/spec/mod.rs b/crates/jsonschema-referencing/src/spec/mod.rs index 6b2a67c3..c0048dee 100644 --- a/crates/jsonschema-referencing/src/spec/mod.rs +++ b/crates/jsonschema-referencing/src/spec/mod.rs @@ -2,13 +2,12 @@ //! //! When the registry walks a schema document, each JSON object is scanned to extract //! relevant information: -//! - [`ObjectScan`]: lightweight flags — whether the object has an `$id`, anchors, `$ref`/`$schema`. -//! - [`ObjectInfo`]: the full keyword values of `$id`, `$ref`, and `$schema`. +//! - [`ObjectAnalysis`]: shared per-object metadata for the new one-pass analyzer. //! - [`ChildNode`]: a child to process next in the BFS queue, with its path and active draft. //! //! The sub-modules contain draft-specific scanning logic that produces these types. -use serde_json::{Map, Value}; +use serde_json::Value; pub(crate) mod draft201909; pub(crate) mod draft202012; @@ -26,15 +25,8 @@ pub(crate) enum PathSegment<'a> { Index(usize), } -/// Lightweight scan result for a JSON object: flags for id, anchors, and $ref/$schema. -pub(crate) struct ObjectScan<'a> { - pub(crate) id: Option<&'a str>, - pub(crate) has_anchor: bool, - pub(crate) has_ref_or_schema: bool, -} - -/// Detailed keyword values extracted from a JSON object during schema processing. -pub(crate) struct ObjectInfo<'a> { +/// Shared metadata extracted from one schema object by the new analyzer path. +pub(crate) struct ObjectAnalysis<'a> { pub(crate) id: Option<&'a str>, pub(crate) has_anchor: bool, pub(crate) ref_: Option<&'a str>, @@ -87,24 +79,50 @@ impl<'a> ChildNode<'a> { } } +/// Sink for analyzer-emitted child schemas. +pub(crate) trait ChildBuffer<'a> { + fn push_key(&mut self, key: &'a str, value: &'a Value, draft: Draft); + fn push_key_index(&mut self, key: &'a str, index: usize, value: &'a Value, draft: Draft); + fn push_key_key(&mut self, key: &'a str, child_key: &'a str, value: &'a Value, draft: Draft); +} + +impl<'a> ChildBuffer<'a> for Vec<(&'a Value, Draft)> { + #[inline] + fn push_key(&mut self, _key: &'a str, value: &'a Value, draft: Draft) { + self.push((value, draft)); + } + + #[inline] + fn push_key_index(&mut self, _key: &'a str, _index: usize, value: &'a Value, draft: Draft) { + self.push((value, draft)); + } + + #[inline] + fn push_key_key(&mut self, _key: &'a str, _child_key: &'a str, value: &'a Value, draft: Draft) { + self.push((value, draft)); + } +} + +impl<'a> ChildBuffer<'a> for Vec> { + #[inline] + fn push_key(&mut self, key: &'a str, value: &'a Value, draft: Draft) { + self.push(ChildNode::key(key, value, draft)); + } + + #[inline] + fn push_key_index(&mut self, key: &'a str, index: usize, value: &'a Value, draft: Draft) { + self.push(ChildNode::key_index(key, index, value, draft)); + } + + #[inline] + fn push_key_key(&mut self, key: &'a str, child_key: &'a str, value: &'a Value, draft: Draft) { + self.push(ChildNode::key_key(key, child_key, value, draft)); + } +} + /// Captured `$ref` and `$schema` string values for a schema object being processed. #[derive(Default)] pub(crate) struct ReferenceSlots<'a> { pub(crate) ref_: Option<&'a str>, pub(crate) schema: Option<&'a str>, } - -#[inline] -pub(crate) fn has_ref_or_schema(schema: &Map) -> bool { - if schema.len() <= 3 { - for (key, value) in schema { - if (key == "$ref" || key == "$schema") && value.is_string() { - return true; - } - } - false - } else { - schema.get("$ref").and_then(Value::as_str).is_some() - || schema.get("$schema").and_then(Value::as_str).is_some() - } -} From a50f258bdf2e56d04f33a00f558fb8b79e6e05f5 Mon Sep 17 00:00:00 2001 From: Dmitry Dygalo Date: Tue, 7 Apr 2026 23:20:00 +0200 Subject: [PATCH 12/14] wip Signed-off-by: Dmitry Dygalo --- .../src/registry/build.rs | 54 ++++++++----------- .../src/spec/draft201909.rs | 31 +++++------ .../src/spec/draft202012.rs | 25 ++++----- .../jsonschema-referencing/src/spec/draft4.rs | 33 ++++++------ .../jsonschema-referencing/src/spec/draft6.rs | 31 +++++------ .../jsonschema-referencing/src/spec/draft7.rs | 31 +++++------ crates/jsonschema-referencing/src/spec/mod.rs | 7 --- profiler/Cargo.toml | 1 - profiler/Justfile | 4 -- 9 files changed, 101 insertions(+), 116 deletions(-) diff --git a/crates/jsonschema-referencing/src/registry/build.rs b/crates/jsonschema-referencing/src/registry/build.rs index 87462509..0b079360 100644 --- a/crates/jsonschema-referencing/src/registry/build.rs +++ b/crates/jsonschema-referencing/src/registry/build.rs @@ -12,13 +12,13 @@ use std::{borrow::Cow, collections::VecDeque, num::NonZeroUsize, sync::Arc}; use ahash::{AHashMap, AHashSet}; use fluent_uri::{pct_enc::EStr, Uri}; -use serde_json::{Map, Value}; +use serde_json::Value; use crate::{ cache::UriCache, meta::metas_for_draft, pointer::{pointer, ParsedPointer, ParsedPointerSegment}, - spec::{ChildNode, ObjectAnalysis, PathSegment, ReferenceSlots}, + spec::{ChildNode, PathSegment}, uri, Draft, Error, JsonPointerNode, Retrieve, }; @@ -275,12 +275,8 @@ trait SubtreeStrategy<'v>: Copy { /// The element type stored in the child scratch buffer. type Child: Copy + 'v; - /// Analyze `object` once, appending children to strategy-specific scratch. - fn analyze_into( - draft: Draft, - object: &'v Map, - state: &mut ProcessingState<'v>, - ) -> ObjectAnalysis<'v>; + /// Borrow the strategy-specific child scratch buffer. + fn child_scratch<'s>(state: &'s mut ProcessingState<'v>) -> &'s mut Vec; /// Register `subschema` in the index at `key`. fn register( @@ -335,12 +331,8 @@ impl<'v> SubtreeStrategy<'v> for BorrowedStrategy { type Child = (&'v Value, Draft); #[inline] - fn analyze_into( - draft: Draft, - object: &'v Map, - state: &mut ProcessingState<'v>, - ) -> ObjectAnalysis<'v> { - draft.analyze_object_into(object, &mut state.borrowed_child_scratch) + fn child_scratch<'s>(state: &'s mut ProcessingState<'v>) -> &'s mut Vec { + &mut state.borrowed_child_scratch } #[inline] @@ -417,12 +409,8 @@ impl<'v> SubtreeStrategy<'v> for OwnedStrategy<'v, '_, '_, '_> { type Child = ChildNode<'v>; #[inline] - fn analyze_into( - draft: Draft, - object: &'v Map, - state: &mut ProcessingState<'v>, - ) -> ObjectAnalysis<'v> { - draft.analyze_object_into(object, &mut state.owned_child_scratch) + fn child_scratch<'s>(state: &'s mut ProcessingState<'v>) -> &'s mut Vec { + &mut state.owned_child_scratch } #[inline] @@ -657,12 +645,18 @@ fn explore_subtree<'v, S: SubtreeStrategy<'v>>( resolution_cache: &mut UriCache, strategy: S, local_seen: &mut VisitedRefs<'v>, -) -> Result<(), Error> { +) -> Result<(), Error> +where + Vec: crate::spec::ChildBuffer<'v>, +{ let Some(object) = subschema.as_object() else { return Ok(()); }; let child_start = S::child_items(state).len(); - let analysis = S::analyze_into(draft, object, state); + let analysis = { + let scratch = S::child_scratch(state); + draft.analyze_object_into(object, scratch) + }; let child_end = S::child_items(state).len(); if let Some(id) = analysis.id { @@ -686,14 +680,11 @@ fn explore_subtree<'v, S: SubtreeStrategy<'v>>( if state.ctx.visited_schemas.insert(subschema_ptr) && (analysis.ref_.is_some() || analysis.schema.is_some()) { - let slots = ReferenceSlots { - ref_: analysis.ref_, - schema: analysis.schema, - }; - collect_external_resources_from_slots( + collect_external_resources_from_values( &base, document_root, - &slots, + analysis.ref_, + analysis.schema, &mut state.ctx, resolution_cache, draft, @@ -1209,17 +1200,18 @@ fn with_owned_child_path( } } -fn collect_external_resources_from_slots<'doc>( +fn collect_external_resources_from_values<'doc>( base: &Arc>, root: &'doc Value, - references: &ReferenceSlots<'doc>, + ref_: Option<&'doc str>, + schema: Option<&'doc str>, ctx: &mut TraversalCtx, resolution_cache: &mut UriCache, draft: Draft, doc_key: &Arc>, visited: &mut VisitedRefs<'doc>, ) -> Result<(), Error> { - for (reference, key) in [(references.ref_, "$ref"), (references.schema, "$schema")] { + for (reference, key) in [(ref_, "$ref"), (schema, "$schema")] { let Some(reference) = reference else { continue; }; diff --git a/crates/jsonschema-referencing/src/spec/draft201909.rs b/crates/jsonschema-referencing/src/spec/draft201909.rs index af2cf5fd..81316870 100644 --- a/crates/jsonschema-referencing/src/spec/draft201909.rs +++ b/crates/jsonschema-referencing/src/spec/draft201909.rs @@ -8,12 +8,12 @@ use crate::{ use super::draft202012::{self, SubresourceIteratorInner}; -fn visit_child<'a, E>( +fn for_each_child<'a>( key: &'a str, value: &'a Value, draft: Draft, - f: &mut impl FnMut(&'a str, Option<&'a str>, Option, &'a Value, Draft) -> Result<(), E>, -) -> Result<(), E> { + f: &mut impl FnMut(&'a str, Option<&'a str>, Option, &'a Value, Draft), +) { match key { "additionalItems" | "additionalProperties" @@ -26,12 +26,12 @@ fn visit_child<'a, E>( | "then" | "unevaluatedItems" | "unevaluatedProperties" => { - f(key, None, None, value, draft.detect(value))?; + f(key, None, None, value, draft.detect(value)); } "allOf" | "anyOf" | "oneOf" => { if let Some(arr) = value.as_array() { for (index, item) in arr.iter().enumerate() { - f(key, None, Some(index), item, draft.detect(item))?; + f(key, None, Some(index), item, draft.detect(item)); } } } @@ -44,17 +44,17 @@ fn visit_child<'a, E>( None, child_value, draft.detect(child_value), - )?; + ); } } } "items" => match value { Value::Array(arr) => { for (index, item) in arr.iter().enumerate() { - f("items", None, Some(index), item, draft.detect(item))?; + f("items", None, Some(index), item, draft.detect(item)); } } - _ => f("items", None, None, value, draft.detect(value))?, + _ => f("items", None, None, value, draft.detect(value)), }, "dependencies" => { if let Some(obj) = value.as_object() { @@ -68,13 +68,12 @@ fn visit_child<'a, E>( None, child_value, draft.detect(child_value), - )?; + ); } } } _ => {} } - Ok(()) } pub(crate) fn analyze_object_into<'a, C>( @@ -98,14 +97,16 @@ where "$schema" => schema_ref = value.as_str(), "$dynamicAnchor" => {} other => { - let _ = visit_child(other, value, draft, &mut |kw, nk, idx, v, d| { - match (nk, idx) { + for_each_child( + other, + value, + draft, + &mut |kw, nk, idx, v, d| match (nk, idx) { (Some(k), _) => children.push_key_key(kw, k, v, d), (_, Some(i)) => children.push_key_index(kw, i, v, d), _ => children.push_key(kw, v, d), - } - Ok::<(), std::convert::Infallible>(()) - }); + }, + ); } } } diff --git a/crates/jsonschema-referencing/src/spec/draft202012.rs b/crates/jsonschema-referencing/src/spec/draft202012.rs index c76763de..3819d214 100644 --- a/crates/jsonschema-referencing/src/spec/draft202012.rs +++ b/crates/jsonschema-referencing/src/spec/draft202012.rs @@ -10,12 +10,12 @@ use crate::{ Error, Resolver, ResourceRef, Segments, }; -fn visit_child<'a, E>( +fn for_each_child<'a>( key: &'a str, value: &'a Value, draft: Draft, - f: &mut impl FnMut(&'a str, Option<&'a str>, Option, &'a Value, Draft) -> Result<(), E>, -) -> Result<(), E> { + f: &mut impl FnMut(&'a str, Option<&'a str>, Option, &'a Value, Draft), +) { match key { "additionalProperties" | "contains" @@ -28,12 +28,12 @@ fn visit_child<'a, E>( | "then" | "unevaluatedItems" | "unevaluatedProperties" => { - f(key, None, None, value, draft.detect(value))?; + f(key, None, None, value, draft.detect(value)); } "allOf" | "anyOf" | "oneOf" | "prefixItems" => { if let Some(arr) = value.as_array() { for (index, item) in arr.iter().enumerate() { - f(key, None, Some(index), item, draft.detect(item))?; + f(key, None, Some(index), item, draft.detect(item)); } } } @@ -46,13 +46,12 @@ fn visit_child<'a, E>( None, child_value, draft.detect(child_value), - )?; + ); } } } _ => {} } - Ok(()) } pub(crate) fn analyze_object_into<'a, C>( @@ -75,14 +74,16 @@ where "$ref" => ref_ = value.as_str(), "$schema" => schema_ref = value.as_str(), other => { - let _ = visit_child(other, value, draft, &mut |kw, nk, idx, v, d| { - match (nk, idx) { + for_each_child( + other, + value, + draft, + &mut |kw, nk, idx, v, d| match (nk, idx) { (Some(k), _) => children.push_key_key(kw, k, v, d), (_, Some(i)) => children.push_key_index(kw, i, v, d), _ => children.push_key(kw, v, d), - } - Ok::<(), std::convert::Infallible>(()) - }); + }, + ); } } } diff --git a/crates/jsonschema-referencing/src/spec/draft4.rs b/crates/jsonschema-referencing/src/spec/draft4.rs index 7002e9dc..795afa28 100644 --- a/crates/jsonschema-referencing/src/spec/draft4.rs +++ b/crates/jsonschema-referencing/src/spec/draft4.rs @@ -8,15 +8,15 @@ use crate::{ use super::draft202012::{self, SubresourceIteratorInner}; -fn visit_child<'a, E>( +fn for_each_child<'a>( key: &'a str, value: &'a Value, draft: Draft, - f: &mut impl FnMut(&'a str, Option<&'a str>, Option, &'a Value, Draft) -> Result<(), E>, -) -> Result<(), E> { + f: &mut impl FnMut(&'a str, Option<&'a str>, Option, &'a Value, Draft), +) { match key { "additionalItems" | "additionalProperties" if value.is_object() => { - f(key, None, None, value, draft.detect(value))?; + f(key, None, None, value, draft.detect(value)); } "contains" | "contentSchema" @@ -27,12 +27,12 @@ fn visit_child<'a, E>( | "then" | "unevaluatedItems" | "unevaluatedProperties" => { - f(key, None, None, value, draft.detect(value))?; + f(key, None, None, value, draft.detect(value)); } "allOf" | "anyOf" | "oneOf" | "prefixItems" => { if let Some(arr) = value.as_array() { for (index, item) in arr.iter().enumerate() { - f(key, None, Some(index), item, draft.detect(item))?; + f(key, None, Some(index), item, draft.detect(item)); } } } @@ -45,17 +45,17 @@ fn visit_child<'a, E>( None, child_value, draft.detect(child_value), - )?; + ); } } } "items" => match value { Value::Array(arr) => { for (index, item) in arr.iter().enumerate() { - f(key, None, Some(index), item, draft.detect(item))?; + f(key, None, Some(index), item, draft.detect(item)); } } - _ => f(key, None, None, value, draft.detect(value))?, + _ => f(key, None, None, value, draft.detect(value)), }, "dependencies" => { if let Some(obj) = value.as_object() { @@ -69,13 +69,12 @@ fn visit_child<'a, E>( None, child_value, draft.detect(child_value), - )?; + ); } } } _ => {} } - Ok(()) } pub(crate) fn analyze_object_into<'a, C>( @@ -96,14 +95,16 @@ where "$ref" => ref_ = value.as_str(), "$schema" => schema_ref = value.as_str(), other => { - let _ = visit_child(other, value, draft, &mut |kw, nk, idx, v, d| { - match (nk, idx) { + for_each_child( + other, + value, + draft, + &mut |kw, nk, idx, v, d| match (nk, idx) { (Some(k), _) => children.push_key_key(kw, k, v, d), (_, Some(i)) => children.push_key_index(kw, i, v, d), _ => children.push_key(kw, v, d), - } - Ok::<(), std::convert::Infallible>(()) - }); + }, + ); } } } diff --git a/crates/jsonschema-referencing/src/spec/draft6.rs b/crates/jsonschema-referencing/src/spec/draft6.rs index be88674b..491db8f1 100644 --- a/crates/jsonschema-referencing/src/spec/draft6.rs +++ b/crates/jsonschema-referencing/src/spec/draft6.rs @@ -8,20 +8,20 @@ use crate::{ use super::draft202012::{self, SubresourceIteratorInner}; -fn visit_child<'a, E>( +fn for_each_child<'a>( key: &'a str, value: &'a Value, draft: Draft, - f: &mut impl FnMut(&'a str, Option<&'a str>, Option, &'a Value, Draft) -> Result<(), E>, -) -> Result<(), E> { + f: &mut impl FnMut(&'a str, Option<&'a str>, Option, &'a Value, Draft), +) { match key { "additionalItems" | "additionalProperties" | "contains" | "not" | "propertyNames" => { - f(key, None, None, value, draft.detect(value))?; + f(key, None, None, value, draft.detect(value)); } "allOf" | "anyOf" | "oneOf" => { if let Some(arr) = value.as_array() { for (index, item) in arr.iter().enumerate() { - f(key, None, Some(index), item, draft.detect(item))?; + f(key, None, Some(index), item, draft.detect(item)); } } } @@ -34,17 +34,17 @@ fn visit_child<'a, E>( None, child_value, draft.detect(child_value), - )?; + ); } } } "items" => match value { Value::Array(arr) => { for (index, item) in arr.iter().enumerate() { - f("items", None, Some(index), item, draft.detect(item))?; + f("items", None, Some(index), item, draft.detect(item)); } } - _ => f("items", None, None, value, draft.detect(value))?, + _ => f("items", None, None, value, draft.detect(value)), }, "dependencies" => { if let Some(obj) = value.as_object() { @@ -58,13 +58,12 @@ fn visit_child<'a, E>( None, child_value, draft.detect(child_value), - )?; + ); } } } _ => {} } - Ok(()) } pub(crate) fn analyze_object_into<'a, C>( @@ -85,14 +84,16 @@ where "$ref" => ref_ = value.as_str(), "$schema" => schema_ref = value.as_str(), other => { - let _ = visit_child(other, value, draft, &mut |kw, nk, idx, v, d| { - match (nk, idx) { + for_each_child( + other, + value, + draft, + &mut |kw, nk, idx, v, d| match (nk, idx) { (Some(k), _) => children.push_key_key(kw, k, v, d), (_, Some(i)) => children.push_key_index(kw, i, v, d), _ => children.push_key(kw, v, d), - } - Ok::<(), std::convert::Infallible>(()) - }); + }, + ); } } } diff --git a/crates/jsonschema-referencing/src/spec/draft7.rs b/crates/jsonschema-referencing/src/spec/draft7.rs index 48a8faf9..412644d8 100644 --- a/crates/jsonschema-referencing/src/spec/draft7.rs +++ b/crates/jsonschema-referencing/src/spec/draft7.rs @@ -8,12 +8,12 @@ use crate::{ use super::draft202012::{self, SubresourceIteratorInner}; -fn visit_child<'a, E>( +fn for_each_child<'a>( key: &'a str, value: &'a Value, draft: Draft, - f: &mut impl FnMut(&'a str, Option<&'a str>, Option, &'a Value, Draft) -> Result<(), E>, -) -> Result<(), E> { + f: &mut impl FnMut(&'a str, Option<&'a str>, Option, &'a Value, Draft), +) { match key { "additionalItems" | "additionalProperties" @@ -23,12 +23,12 @@ fn visit_child<'a, E>( | "not" | "propertyNames" | "then" => { - f(key, None, None, value, draft.detect(value))?; + f(key, None, None, value, draft.detect(value)); } "allOf" | "anyOf" | "oneOf" => { if let Some(arr) = value.as_array() { for (index, item) in arr.iter().enumerate() { - f(key, None, Some(index), item, draft.detect(item))?; + f(key, None, Some(index), item, draft.detect(item)); } } } @@ -41,17 +41,17 @@ fn visit_child<'a, E>( None, child_value, draft.detect(child_value), - )?; + ); } } } "items" => match value { Value::Array(arr) => { for (index, item) in arr.iter().enumerate() { - f("items", None, Some(index), item, draft.detect(item))?; + f("items", None, Some(index), item, draft.detect(item)); } } - _ => f("items", None, None, value, draft.detect(value))?, + _ => f("items", None, None, value, draft.detect(value)), }, "dependencies" => { if let Some(obj) = value.as_object() { @@ -65,13 +65,12 @@ fn visit_child<'a, E>( None, child_value, draft.detect(child_value), - )?; + ); } } } _ => {} } - Ok(()) } pub(crate) fn analyze_object_into<'a, C>( @@ -92,14 +91,16 @@ where "$ref" => ref_ = value.as_str(), "$schema" => schema_ref = value.as_str(), other => { - let _ = visit_child(other, value, draft, &mut |kw, nk, idx, v, d| { - match (nk, idx) { + for_each_child( + other, + value, + draft, + &mut |kw, nk, idx, v, d| match (nk, idx) { (Some(k), _) => children.push_key_key(kw, k, v, d), (_, Some(i)) => children.push_key_index(kw, i, v, d), _ => children.push_key(kw, v, d), - } - Ok::<(), std::convert::Infallible>(()) - }); + }, + ); } } } diff --git a/crates/jsonschema-referencing/src/spec/mod.rs b/crates/jsonschema-referencing/src/spec/mod.rs index c0048dee..5ebecd7f 100644 --- a/crates/jsonschema-referencing/src/spec/mod.rs +++ b/crates/jsonschema-referencing/src/spec/mod.rs @@ -119,10 +119,3 @@ impl<'a> ChildBuffer<'a> for Vec> { self.push(ChildNode::key_key(key, child_key, value, draft)); } } - -/// Captured `$ref` and `$schema` string values for a schema object being processed. -#[derive(Default)] -pub(crate) struct ReferenceSlots<'a> { - pub(crate) ref_: Option<&'a str>, - pub(crate) schema: Option<&'a str>, -} diff --git a/profiler/Cargo.toml b/profiler/Cargo.toml index 7d207e67..5d465b2b 100644 --- a/profiler/Cargo.toml +++ b/profiler/Cargo.toml @@ -16,7 +16,6 @@ serde_json = "1" [features] dhat-heap = [] -perf-observe-registry = ["referencing/perf-observe-registry"] [profile.release] debug = true diff --git a/profiler/Justfile b/profiler/Justfile index f4029fe7..9da94897 100644 --- a/profiler/Justfile +++ b/profiler/Justfile @@ -10,10 +10,6 @@ flame preset method iterations="10000": @echo "Opening {{preset}}-{{method}}.svg in browser..." @xdg-open {{preset}}-{{method}}.svg 2>/dev/null || open {{preset}}-{{method}}.svg 2>/dev/null || echo "Please open {{preset}}-{{method}}.svg manually" -observe-registry preset iterations="1": - #!/bin/zsh - cargo run --release --features perf-observe-registry --package jsonschema-profiler -- --preset {{preset}} --method registry --iterations {{iterations}} | counts - # Profile with dhat using a preset dhat preset method iterations="10000": cargo run --release --features dhat-heap --package jsonschema-profiler \ From cd7038597983b1b63a76f6416aa17e1ed3c4941f Mon Sep 17 00:00:00 2001 From: Dmitry Dygalo Date: Wed, 8 Apr 2026 00:37:05 +0200 Subject: [PATCH 13/14] wip Signed-off-by: Dmitry Dygalo --- crates/jsonschema-referencing/src/draft.rs | 43 ++- .../src/registry/build.rs | 171 ++++------- .../src/spec/draft201909.rs | 210 ++++++++++--- .../src/spec/draft202012.rs | 282 ++++++++++++++++-- .../jsonschema-referencing/src/spec/draft4.rs | 214 +++++++++---- .../jsonschema-referencing/src/spec/draft6.rs | 236 ++++++++++++--- .../jsonschema-referencing/src/spec/draft7.rs | 194 +++++++++--- crates/jsonschema-referencing/src/spec/mod.rs | 41 --- 8 files changed, 1035 insertions(+), 356 deletions(-) diff --git a/crates/jsonschema-referencing/src/draft.rs b/crates/jsonschema-referencing/src/draft.rs index 6693124b..a364ea20 100644 --- a/crates/jsonschema-referencing/src/draft.rs +++ b/crates/jsonschema-referencing/src/draft.rs @@ -2,7 +2,7 @@ use serde_json::{Map, Value}; use crate::{ anchor, - spec::{self, draft201909, draft202012, draft4, draft6, draft7, ChildBuffer, ObjectAnalysis}, + spec::{self, draft201909, draft202012, draft4, draft6, draft7, ChildNode, ObjectAnalysis}, vocabularies::{VocabularySet, DRAFT_2019_09_VOCABULARIES, DRAFT_2020_12_VOCABULARIES}, Anchor, Error, Resolver, Resource, ResourceRef, Segments, }; @@ -107,21 +107,40 @@ impl Draft { None => draft202012::SubresourceIterator::Empty, } } - pub(crate) fn analyze_object_into<'a, C>( + pub(crate) fn analyze_object(self, contents: &Map) -> ObjectAnalysis<'_> { + match self { + Draft::Draft4 => draft4::analyze_object(contents, self), + Draft::Draft6 => draft6::analyze_object(contents, self), + Draft::Draft7 => draft7::analyze_object(contents, self), + Draft::Draft201909 => draft201909::analyze_object(contents, self), + Draft::Draft202012 | Draft::Unknown => draft202012::analyze_object(contents, self), + } + } + pub(crate) fn for_each_child<'a>( + self, + contents: &'a Map, + f: &mut impl FnMut(&'a Value, Draft) -> Result<(), Error>, + ) -> Result<(), Error> { + match self { + Draft::Draft4 => draft4::for_each_child(contents, self, f), + Draft::Draft6 => draft6::for_each_child(contents, self, f), + Draft::Draft7 => draft7::for_each_child(contents, self, f), + Draft::Draft201909 => draft201909::for_each_child(contents, self, f), + Draft::Draft202012 | Draft::Unknown => draft202012::for_each_child(contents, self, f), + } + } + pub(crate) fn for_each_owned_child<'a>( self, contents: &'a Map, - children: &mut C, - ) -> ObjectAnalysis<'a> - where - C: ChildBuffer<'a>, - { + f: &mut impl FnMut(ChildNode<'a>) -> Result<(), Error>, + ) -> Result<(), Error> { match self { - Draft::Draft4 => draft4::analyze_object_into(contents, self, children), - Draft::Draft6 => draft6::analyze_object_into(contents, self, children), - Draft::Draft7 => draft7::analyze_object_into(contents, self, children), - Draft::Draft201909 => draft201909::analyze_object_into(contents, self, children), + Draft::Draft4 => draft4::for_each_owned_child(contents, self, f), + Draft::Draft6 => draft6::for_each_owned_child(contents, self, f), + Draft::Draft7 => draft7::for_each_owned_child(contents, self, f), + Draft::Draft201909 => draft201909::for_each_owned_child(contents, self, f), Draft::Draft202012 | Draft::Unknown => { - draft202012::analyze_object_into(contents, self, children) + draft202012::for_each_owned_child(contents, self, f) } } } diff --git a/crates/jsonschema-referencing/src/registry/build.rs b/crates/jsonschema-referencing/src/registry/build.rs index 0b079360..a34a388d 100644 --- a/crates/jsonschema-referencing/src/registry/build.rs +++ b/crates/jsonschema-referencing/src/registry/build.rs @@ -272,12 +272,6 @@ impl TraversalCtx { /// /// Both are `Copy` so the generic function can pass the strategy by value without cloning. trait SubtreeStrategy<'v>: Copy { - /// The element type stored in the child scratch buffer. - type Child: Copy + 'v; - - /// Borrow the strategy-specific child scratch buffer. - fn child_scratch<'s>(state: &'s mut ProcessingState<'v>) -> &'s mut Vec; - /// Register `subschema` in the index at `key`. fn register( self, @@ -303,16 +297,11 @@ trait SubtreeStrategy<'v>: Copy { has_anchor: bool, ) -> bool; - /// Slice of children currently in the scratch buffer. - fn child_items<'s>(state: &'s ProcessingState<'v>) -> &'s [Self::Child]; - - /// Truncate the scratch buffer to `to` elements. - fn truncate_children(state: &mut ProcessingState<'v>, to: usize); - - /// Recurse into one child, extending the path/base as appropriate. - fn recurse( + /// Stream child nodes and recurse into them, extending the path/base as appropriate. + fn walk_children( self, - child: Self::Child, + draft: Draft, + object: &'v serde_json::Map, base: Arc>, document_root: &'v Value, document_root_uri: &Arc>, @@ -328,13 +317,6 @@ trait SubtreeStrategy<'v>: Copy { struct BorrowedStrategy; impl<'v> SubtreeStrategy<'v> for BorrowedStrategy { - type Child = (&'v Value, Draft); - - #[inline] - fn child_scratch<'s>(state: &'s mut ProcessingState<'v>) -> &'s mut Vec { - &mut state.borrowed_child_scratch - } - #[inline] fn register( self, @@ -358,19 +340,10 @@ impl<'v> SubtreeStrategy<'v> for BorrowedStrategy { !(is_root && new_base == root_uri) } - #[inline] - fn child_items<'s>(state: &'s ProcessingState<'v>) -> &'s [Self::Child] { - &state.borrowed_child_scratch - } - - #[inline] - fn truncate_children(state: &mut ProcessingState<'v>, to: usize) { - state.borrowed_child_scratch.truncate(to); - } - - fn recurse( + fn walk_children( self, - (child, child_draft): (&'v Value, Draft), + draft: Draft, + object: &'v serde_json::Map, base: Arc>, document_root: &'v Value, document_root_uri: &Arc>, @@ -379,19 +352,21 @@ impl<'v> SubtreeStrategy<'v> for BorrowedStrategy { resolution_cache: &mut UriCache, local_seen: &mut VisitedRefs<'v>, ) -> Result<(), Error> { - explore_subtree( - base, - document_root, - child, - child_draft, - false, - document_root_uri, - state, - known_resources, - resolution_cache, - BorrowedStrategy, - local_seen, - ) + draft.for_each_child(object, &mut |child, child_draft| { + explore_subtree( + Arc::clone(&base), + document_root, + child, + child_draft, + false, + document_root_uri, + state, + known_resources, + resolution_cache, + BorrowedStrategy, + local_seen, + ) + }) } } @@ -406,13 +381,6 @@ struct OwnedStrategy<'v, 'doc, 'key, 'node> { } impl<'v> SubtreeStrategy<'v> for OwnedStrategy<'v, '_, '_, '_> { - type Child = ChildNode<'v>; - - #[inline] - fn child_scratch<'s>(state: &'s mut ProcessingState<'v>) -> &'s mut Vec { - &mut state.owned_child_scratch - } - #[inline] fn register( self, @@ -437,19 +405,10 @@ impl<'v> SubtreeStrategy<'v> for OwnedStrategy<'v, '_, '_, '_> { !(is_root && new_base == root_uri) && (changed || has_anchor) } - #[inline] - fn child_items<'s>(state: &'s ProcessingState<'v>) -> &'s [Self::Child] { - &state.owned_child_scratch - } - - #[inline] - fn truncate_children(state: &mut ProcessingState<'v>, to: usize) { - state.owned_child_scratch.truncate(to); - } - - fn recurse( + fn walk_children( self, - child: ChildNode<'v>, + draft: Draft, + object: &'v serde_json::Map, base: Arc>, document_root: &'v Value, document_root_uri: &Arc>, @@ -458,23 +417,25 @@ impl<'v> SubtreeStrategy<'v> for OwnedStrategy<'v, '_, '_, '_> { resolution_cache: &mut UriCache, local_seen: &mut VisitedRefs<'v>, ) -> Result<(), Error> { - with_owned_child_path(self.path, &child, |child_path| { - explore_subtree( - base, - document_root, - child.value, - child.draft, - false, - document_root_uri, - state, - known_resources, - resolution_cache, - OwnedStrategy { - document: self.document, - path: child_path, - }, - local_seen, - ) + draft.for_each_owned_child(object, &mut |child| { + with_owned_child_path(self.path, &child, |child_path| { + explore_subtree( + Arc::clone(&base), + document_root, + child.value, + child.draft, + false, + document_root_uri, + state, + known_resources, + resolution_cache, + OwnedStrategy { + document: self.document, + path: child_path, + }, + local_seen, + ) + }) }) } } @@ -482,8 +443,6 @@ impl<'v> SubtreeStrategy<'v> for OwnedStrategy<'v, '_, '_, '_> { struct ProcessingState<'a> { queue: VecDeque, custom_metaschemas: Vec, - borrowed_child_scratch: Vec<(&'a Value, Draft)>, - owned_child_scratch: Vec>, index: Index<'a>, ctx: TraversalCtx, } @@ -493,8 +452,6 @@ impl ProcessingState<'_> { Self { queue: VecDeque::with_capacity(32), custom_metaschemas: Vec::new(), - borrowed_child_scratch: Vec::new(), - owned_child_scratch: Vec::new(), index: Index::default(), ctx: TraversalCtx::new(), } @@ -645,19 +602,11 @@ fn explore_subtree<'v, S: SubtreeStrategy<'v>>( resolution_cache: &mut UriCache, strategy: S, local_seen: &mut VisitedRefs<'v>, -) -> Result<(), Error> -where - Vec: crate::spec::ChildBuffer<'v>, -{ +) -> Result<(), Error> { let Some(object) = subschema.as_object() else { return Ok(()); }; - let child_start = S::child_items(state).len(); - let analysis = { - let scratch = S::child_scratch(state); - draft.analyze_object_into(object, scratch) - }; - let child_end = S::child_items(state).len(); + let analysis = draft.analyze_object(object); if let Some(id) = analysis.id { let (new_base, changed) = @@ -693,23 +642,17 @@ where )?; } - let mut idx = child_start; - while idx < child_end { - let child = S::child_items(state)[idx]; - idx += 1; - strategy.recurse( - child, - Arc::clone(&base), - document_root, - document_root_uri, - state, - known_resources, - resolution_cache, - local_seen, - )?; - } - S::truncate_children(state, child_start); - Ok(()) + strategy.walk_children( + draft, + object, + base, + document_root, + document_root_uri, + state, + known_resources, + resolution_cache, + local_seen, + ) } fn explore_borrowed_subtree<'r>( diff --git a/crates/jsonschema-referencing/src/spec/draft201909.rs b/crates/jsonschema-referencing/src/spec/draft201909.rs index 81316870..68ba4c38 100644 --- a/crates/jsonschema-referencing/src/spec/draft201909.rs +++ b/crates/jsonschema-referencing/src/spec/draft201909.rs @@ -2,18 +2,18 @@ use serde_json::{Map, Value}; use crate::{ draft::Draft, - spec::{ChildBuffer, ObjectAnalysis}, + spec::{ChildNode, ObjectAnalysis}, Error, Resolver, ResourceRef, Segments, }; use super::draft202012::{self, SubresourceIteratorInner}; -fn for_each_child<'a>( +fn visit_child<'a>( key: &'a str, value: &'a Value, draft: Draft, - f: &mut impl FnMut(&'a str, Option<&'a str>, Option, &'a Value, Draft), -) { + f: &mut impl FnMut(&'a str, Option<&'a str>, Option, &'a Value, Draft) -> Result<(), Error>, +) -> Result<(), Error> { match key { "additionalItems" | "additionalProperties" @@ -26,12 +26,12 @@ fn for_each_child<'a>( | "then" | "unevaluatedItems" | "unevaluatedProperties" => { - f(key, None, None, value, draft.detect(value)); + f(key, None, None, value, draft.detect(value))?; } "allOf" | "anyOf" | "oneOf" => { if let Some(arr) = value.as_array() { for (index, item) in arr.iter().enumerate() { - f(key, None, Some(index), item, draft.detect(item)); + f(key, None, Some(index), item, draft.detect(item))?; } } } @@ -44,17 +44,17 @@ fn for_each_child<'a>( None, child_value, draft.detect(child_value), - ); + )?; } } } "items" => match value { Value::Array(arr) => { for (index, item) in arr.iter().enumerate() { - f("items", None, Some(index), item, draft.detect(item)); + f("items", None, Some(index), item, draft.detect(item))?; } } - _ => f("items", None, None, value, draft.detect(value)), + _ => f("items", None, None, value, draft.detect(value))?, }, "dependencies" => { if let Some(obj) = value.as_object() { @@ -68,22 +68,60 @@ fn for_each_child<'a>( None, child_value, draft.detect(child_value), - ); + )?; } } } _ => {} } + Ok(()) } -pub(crate) fn analyze_object_into<'a, C>( +pub(crate) fn for_each_child<'a>( schema: &'a Map, draft: Draft, - children: &mut C, -) -> ObjectAnalysis<'a> -where - C: ChildBuffer<'a>, -{ + f: &mut impl FnMut(&'a Value, Draft) -> Result<(), Error>, +) -> Result<(), Error> { + for (key, value) in schema { + visit_child(key, value, draft, &mut |_, _, _, child, child_draft| { + f(child, child_draft) + })?; + } + Ok(()) +} + +pub(crate) fn for_each_owned_child<'a>( + schema: &'a Map, + draft: Draft, + f: &mut impl FnMut(ChildNode<'a>) -> Result<(), Error>, +) -> Result<(), Error> { + for (key, value) in schema { + visit_child( + key, + value, + draft, + &mut |key, child_key, index, child, child_draft| { + let node = match (child_key, index) { + (Some(child_key), None) => { + ChildNode::key_key(key, child_key, child, child_draft) + } + (None, Some(index)) => ChildNode::key_index(key, index, child, child_draft), + (None, None) => ChildNode::key(key, child, child_draft), + (Some(_), Some(_)) => { + unreachable!("child nodes never have both a child key and index") + } + }; + f(node) + }, + )?; + } + Ok(()) +} + +pub(crate) fn analyze_object( + schema: &Map, + _draft: Draft, +) -> ObjectAnalysis<'_> { let mut id = None; let mut has_anchor = false; let mut ref_ = None; @@ -96,18 +134,7 @@ where "$ref" => ref_ = value.as_str(), "$schema" => schema_ref = value.as_str(), "$dynamicAnchor" => {} - other => { - for_each_child( - other, - value, - draft, - &mut |kw, nk, idx, v, d| match (nk, idx) { - (Some(k), _) => children.push_key_key(kw, k, v, d), - (_, Some(i)) => children.push_key_index(kw, i, v, d), - _ => children.push_key(kw, v, d), - }, - ); - } + _ => {} } } @@ -201,12 +228,17 @@ pub(crate) fn maybe_in_subresource<'r>( #[cfg(test)] mod tests { - use crate::Draft; - use serde_json::json; + use crate::{spec::PathSegment, Draft}; + use serde_json::{json, Value}; + + use super::{for_each_child, for_each_owned_child}; #[test] - fn test_analyze_object_into_2019_only_counts_plain_anchor() { + fn test_analyze_object_2019_only_counts_plain_anchor() { let schema = json!({ + "$id": "https://example.com/root", + "$ref": "other.json", + "$schema": "https://json-schema.org/draft/2019-09/schema", "$anchor": "plain", "properties": { "name": { "type": "string" } @@ -214,11 +246,15 @@ mod tests { }); let object = schema.as_object().unwrap(); - let mut children: Vec<(&serde_json::Value, Draft)> = Vec::new(); - let analysis = Draft::Draft201909.analyze_object_into(object, &mut children); + let analysis = Draft::Draft201909.analyze_object(object); + assert_eq!(analysis.id, Some("https://example.com/root")); assert!(analysis.has_anchor); - assert_eq!(children.len(), 1); + assert_eq!(analysis.ref_, Some("other.json")); + assert_eq!( + analysis.schema, + Some("https://json-schema.org/draft/2019-09/schema") + ); let dynamic_only = json!({ "$dynamicAnchor": "ignored", @@ -228,10 +264,110 @@ mod tests { }); let object = dynamic_only.as_object().unwrap(); - let mut children: Vec<(&serde_json::Value, Draft)> = Vec::new(); - let analysis = Draft::Draft201909.analyze_object_into(object, &mut children); + let analysis = Draft::Draft201909.analyze_object(object); + assert_eq!(analysis.id, None); assert!(!analysis.has_anchor); - assert_eq!(children.len(), 1); + assert_eq!(analysis.ref_, None); + assert_eq!(analysis.schema, None); + } + + #[test] + fn test_for_each_owned_child_streams_expected_shapes() { + let schema = json!({ + "contentSchema": { "type": "null" }, + "allOf": [ + { "minimum": 1 } + ], + "properties": { + "name": { "type": "string" } + } + }); + + let object = schema.as_object().unwrap(); + let mut seen = Vec::new(); + + for_each_owned_child(object, Draft::Draft201909, &mut |child| { + let (shape, path) = match (child.first, child.second) { + (PathSegment::Key(key), None) => ("singleton", key.to_owned()), + (PathSegment::Key(key), Some(PathSegment::Index(index))) => { + ("key_index", format!("{key}/{index}")) + } + (PathSegment::Key(key), Some(PathSegment::Key(child_key))) => { + ("key_key", format!("{key}/{child_key}")) + } + (PathSegment::Index(_), _) => { + unreachable!("owned children always start with a key") + } + }; + + seen.push((shape.to_owned(), path, child.value.clone(), child.draft)); + Ok(()) + }) + .unwrap(); + + assert_eq!( + seen, + vec![ + ( + "key_index".to_string(), + "allOf/0".to_string(), + json!({ "minimum": 1 }), + Draft::Draft201909, + ), + ( + "singleton".to_string(), + "contentSchema".to_string(), + json!({ "type": "null" }), + Draft::Draft201909, + ), + ( + "key_key".to_string(), + "properties/name".to_string(), + json!({ "type": "string" }), + Draft::Draft201909, + ), + ] + ); + } + + #[test] + fn test_for_each_child_streams_expected_values() { + let schema = json!({ + "contentSchema": { + "$schema": "https://json-schema.org/draft/2020-12/schema", + "type": "null" + }, + "allOf": [ + { "minimum": 1 } + ], + "properties": { + "name": { "type": "string" } + } + }); + + let object = schema.as_object().unwrap(); + let mut seen: Vec<(Value, Draft)> = Vec::new(); + + for_each_child(object, Draft::Draft201909, &mut |child, draft| { + seen.push((child.clone(), draft)); + Ok(()) + }) + .unwrap(); + + assert_eq!( + seen, + vec![ + (json!({ "minimum": 1 }), Draft::Draft201909,), + ( + json!({ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "type": "null" + }), + Draft::Draft202012, + ), + (json!({ "type": "string" }), Draft::Draft201909,), + ] + ); } } diff --git a/crates/jsonschema-referencing/src/spec/draft202012.rs b/crates/jsonschema-referencing/src/spec/draft202012.rs index 3819d214..1eb05a3a 100644 --- a/crates/jsonschema-referencing/src/spec/draft202012.rs +++ b/crates/jsonschema-referencing/src/spec/draft202012.rs @@ -6,16 +6,16 @@ use serde_json::{Map, Value}; use crate::{ draft::Draft, segments::Segment, - spec::{ChildBuffer, ObjectAnalysis}, + spec::{ChildNode, ObjectAnalysis}, Error, Resolver, ResourceRef, Segments, }; -fn for_each_child<'a>( +fn visit_child<'a>( key: &'a str, value: &'a Value, draft: Draft, - f: &mut impl FnMut(&'a str, Option<&'a str>, Option, &'a Value, Draft), -) { + f: &mut impl FnMut(&'a str, Option<&'a str>, Option, &'a Value, Draft) -> Result<(), Error>, +) -> Result<(), Error> { match key { "additionalProperties" | "contains" @@ -28,12 +28,12 @@ fn for_each_child<'a>( | "then" | "unevaluatedItems" | "unevaluatedProperties" => { - f(key, None, None, value, draft.detect(value)); + f(key, None, None, value, draft.detect(value))?; } "allOf" | "anyOf" | "oneOf" | "prefixItems" => { if let Some(arr) = value.as_array() { for (index, item) in arr.iter().enumerate() { - f(key, None, Some(index), item, draft.detect(item)); + f(key, None, Some(index), item, draft.detect(item))?; } } } @@ -46,22 +46,60 @@ fn for_each_child<'a>( None, child_value, draft.detect(child_value), - ); + )?; } } } _ => {} } + Ok(()) } -pub(crate) fn analyze_object_into<'a, C>( +pub(crate) fn for_each_child<'a>( schema: &'a Map, draft: Draft, - children: &mut C, -) -> ObjectAnalysis<'a> -where - C: ChildBuffer<'a>, -{ + f: &mut impl FnMut(&'a Value, Draft) -> Result<(), Error>, +) -> Result<(), Error> { + for (key, value) in schema { + visit_child(key, value, draft, &mut |_, _, _, child, child_draft| { + f(child, child_draft) + })?; + } + Ok(()) +} + +pub(crate) fn for_each_owned_child<'a>( + schema: &'a Map, + draft: Draft, + f: &mut impl FnMut(ChildNode<'a>) -> Result<(), Error>, +) -> Result<(), Error> { + for (key, value) in schema { + visit_child( + key, + value, + draft, + &mut |key, child_key, index, child, child_draft| { + let node = match (child_key, index) { + (Some(child_key), None) => { + ChildNode::key_key(key, child_key, child, child_draft) + } + (None, Some(index)) => ChildNode::key_index(key, index, child, child_draft), + (None, None) => ChildNode::key(key, child, child_draft), + (Some(_), Some(_)) => { + unreachable!("child nodes never have both a child key and index") + } + }; + f(node) + }, + )?; + } + Ok(()) +} + +pub(crate) fn analyze_object( + schema: &Map, + _draft: Draft, +) -> ObjectAnalysis<'_> { let mut id = None; let mut has_anchor = false; let mut ref_ = None; @@ -73,18 +111,7 @@ where "$anchor" | "$dynamicAnchor" => has_anchor |= value.as_str().is_some(), "$ref" => ref_ = value.as_str(), "$schema" => schema_ref = value.as_str(), - other => { - for_each_child( - other, - value, - draft, - &mut |kw, nk, idx, v, d| match (nk, idx) { - (Some(k), _) => children.push_key_key(kw, k, v, d), - (_, Some(i)) => children.push_key_index(kw, i, v, d), - _ => children.push_key(kw, v, d), - }, - ); - } + _ => {} } } @@ -258,9 +285,11 @@ pub(crate) fn maybe_in_subresource_with_items_and_dependencies<'r>( #[cfg(test)] mod tests { - use crate::Draft; + use crate::{Draft, Error}; - use super::{object_iter, SubresourceIterator}; + use crate::spec::PathSegment; + + use super::{for_each_child, for_each_owned_child, object_iter, SubresourceIterator}; use ahash::HashSet; use serde_json::{json, Value}; use test_case::test_case; @@ -433,7 +462,7 @@ mod tests { } #[test] - fn test_analyze_object_into_collects_2020_12_metadata_and_children() { + fn test_analyze_object_collects_2020_12_metadata() { let schema = json!({ "$id": "https://example.com/root", "$dynamicAnchor": "node", @@ -448,8 +477,7 @@ mod tests { }); let object = schema.as_object().unwrap(); - let mut children: Vec<(&serde_json::Value, Draft)> = Vec::new(); - let analysis = Draft::Draft202012.analyze_object_into(object, &mut children); + let analysis = Draft::Draft202012.analyze_object(object); assert_eq!(analysis.id, Some("https://example.com/root")); assert!(analysis.has_anchor); @@ -458,6 +486,198 @@ mod tests { analysis.schema, Some("https://json-schema.org/draft/2020-12/schema") ); - assert_eq!(children.len(), 2); + } + + #[test] + fn test_analyze_object_detects_dynamic_anchor_without_id() { + let schema = json!({ + "$dynamicAnchor": "node" + }); + + let object = schema.as_object().unwrap(); + let analysis = Draft::Draft202012.analyze_object(object); + + assert_eq!(analysis.id, None); + assert!(analysis.has_anchor); + assert_eq!(analysis.ref_, None); + assert_eq!(analysis.schema, None); + } + + #[test] + fn test_for_each_owned_child_streams_mixed_child_shapes() { + let schema = json!({ + "not": { "type": "null" }, + "prefixItems": [ + { "type": "integer" } + ], + "allOf": [ + { "minimum": 1 } + ], + "properties": { + "name": { "type": "string" } + } + }); + + let object = schema.as_object().unwrap(); + let mut seen = Vec::new(); + + for_each_owned_child(object, Draft::Draft202012, &mut |child| { + let (shape, path) = match (child.first, child.second) { + (PathSegment::Key(key), None) => ("singleton", key.to_owned()), + (PathSegment::Key(key), Some(PathSegment::Index(index))) => { + ("key_index", format!("{key}/{index}")) + } + (PathSegment::Key(key), Some(PathSegment::Key(child_key))) => { + ("key_key", format!("{key}/{child_key}")) + } + (PathSegment::Index(_), _) => { + unreachable!("owned children always start with a key") + } + }; + + seen.push((shape.to_owned(), path, child.value.clone(), child.draft)); + Ok(()) + }) + .unwrap(); + + assert_eq!( + seen, + vec![ + ( + "key_index".to_string(), + "allOf/0".to_string(), + json!({ "minimum": 1 }), + Draft::Draft202012, + ), + ( + "singleton".to_string(), + "not".to_string(), + json!({ "type": "null" }), + Draft::Draft202012, + ), + ( + "key_index".to_string(), + "prefixItems/0".to_string(), + json!({ "type": "integer" }), + Draft::Draft202012, + ), + ( + "key_key".to_string(), + "properties/name".to_string(), + json!({ "type": "string" }), + Draft::Draft202012, + ), + ] + ); + } + + #[test] + fn test_for_each_owned_child_stops_and_returns_err() { + let schema = json!({ + "allOf": [ + { "minimum": 1 } + ], + "not": { + "$schema": "https://json-schema.org/draft/2019-09/schema", + "type": "null" + }, + "properties": { + "name": { "type": "string" } + } + }); + + let object = schema.as_object().unwrap(); + let mut seen = Vec::new(); + + let error = for_each_owned_child(object, Draft::Draft202012, &mut |child| { + let (shape, path) = match (child.first, child.second) { + (PathSegment::Key(key), None) => ("singleton", key.to_owned()), + (PathSegment::Key(key), Some(PathSegment::Index(index))) => { + ("key_index", format!("{key}/{index}")) + } + (PathSegment::Key(key), Some(PathSegment::Key(child_key))) => { + ("key_key", format!("{key}/{child_key}")) + } + (PathSegment::Index(_), _) => { + unreachable!("owned children always start with a key") + } + }; + + seen.push((shape.to_owned(), path, child.value.clone(), child.draft)); + if seen.len() == 2 { + return Err(Error::unknown_specification("stop")); + } + Ok(()) + }) + .expect_err("the callback error should stop traversal"); + + assert_eq!( + seen, + vec![ + ( + "key_index".to_string(), + "allOf/0".to_string(), + json!({ "minimum": 1 }), + Draft::Draft202012, + ), + ( + "singleton".to_string(), + "not".to_string(), + json!({ + "$schema": "https://json-schema.org/draft/2019-09/schema", + "type": "null" + }), + Draft::Draft201909, + ), + ] + ); + assert_eq!( + error.to_string(), + "Unknown meta-schema: 'stop'. Custom meta-schemas must be registered in the registry before use" + ); + } + + #[test] + fn test_for_each_child_streams_mixed_child_values() { + let schema = json!({ + "not": { + "$schema": "https://json-schema.org/draft/2019-09/schema", + "type": "null" + }, + "prefixItems": [ + { "type": "integer" } + ], + "allOf": [ + { "minimum": 1 } + ], + "properties": { + "name": { "type": "string" } + } + }); + + let object = schema.as_object().unwrap(); + let mut seen = Vec::new(); + + for_each_child(object, Draft::Draft202012, &mut |child, draft| { + seen.push((child.clone(), draft)); + Ok(()) + }) + .unwrap(); + + assert_eq!( + seen, + vec![ + (json!({ "minimum": 1 }), Draft::Draft202012), + ( + json!({ + "$schema": "https://json-schema.org/draft/2019-09/schema", + "type": "null" + }), + Draft::Draft201909, + ), + (json!({ "type": "integer" }), Draft::Draft202012), + (json!({ "type": "string" }), Draft::Draft202012,), + ] + ); } } diff --git a/crates/jsonschema-referencing/src/spec/draft4.rs b/crates/jsonschema-referencing/src/spec/draft4.rs index 795afa28..cb6a637a 100644 --- a/crates/jsonschema-referencing/src/spec/draft4.rs +++ b/crates/jsonschema-referencing/src/spec/draft4.rs @@ -2,21 +2,21 @@ use serde_json::{Map, Value}; use crate::{ draft::Draft, - spec::{ChildBuffer, ObjectAnalysis}, + spec::{ChildNode, ObjectAnalysis}, Error, Resolver, ResourceRef, Segments, }; use super::draft202012::{self, SubresourceIteratorInner}; -fn for_each_child<'a>( +fn visit_child<'a>( key: &'a str, value: &'a Value, draft: Draft, - f: &mut impl FnMut(&'a str, Option<&'a str>, Option, &'a Value, Draft), -) { + f: &mut impl FnMut(&'a str, Option<&'a str>, Option, &'a Value, Draft) -> Result<(), Error>, +) -> Result<(), Error> { match key { "additionalItems" | "additionalProperties" if value.is_object() => { - f(key, None, None, value, draft.detect(value)); + f(key, None, None, value, draft.detect(value))?; } "contains" | "contentSchema" @@ -27,12 +27,12 @@ fn for_each_child<'a>( | "then" | "unevaluatedItems" | "unevaluatedProperties" => { - f(key, None, None, value, draft.detect(value)); + f(key, None, None, value, draft.detect(value))?; } "allOf" | "anyOf" | "oneOf" | "prefixItems" => { if let Some(arr) = value.as_array() { for (index, item) in arr.iter().enumerate() { - f(key, None, Some(index), item, draft.detect(item)); + f(key, None, Some(index), item, draft.detect(item))?; } } } @@ -45,17 +45,17 @@ fn for_each_child<'a>( None, child_value, draft.detect(child_value), - ); + )?; } } } "items" => match value { Value::Array(arr) => { for (index, item) in arr.iter().enumerate() { - f(key, None, Some(index), item, draft.detect(item)); + f(key, None, Some(index), item, draft.detect(item))?; } } - _ => f(key, None, None, value, draft.detect(value)), + _ => f(key, None, None, value, draft.detect(value))?, }, "dependencies" => { if let Some(obj) = value.as_object() { @@ -69,22 +69,60 @@ fn for_each_child<'a>( None, child_value, draft.detect(child_value), - ); + )?; } } } _ => {} } + Ok(()) } -pub(crate) fn analyze_object_into<'a, C>( +pub(crate) fn for_each_child<'a>( schema: &'a Map, draft: Draft, - children: &mut C, -) -> ObjectAnalysis<'a> -where - C: ChildBuffer<'a>, -{ + f: &mut impl FnMut(&'a Value, Draft) -> Result<(), Error>, +) -> Result<(), Error> { + for (key, value) in schema { + visit_child(key, value, draft, &mut |_, _, _, child, child_draft| { + f(child, child_draft) + })?; + } + Ok(()) +} + +pub(crate) fn for_each_owned_child<'a>( + schema: &'a Map, + draft: Draft, + f: &mut impl FnMut(ChildNode<'a>) -> Result<(), Error>, +) -> Result<(), Error> { + for (key, value) in schema { + visit_child( + key, + value, + draft, + &mut |key, child_key, index, child, child_draft| { + let node = match (child_key, index) { + (Some(child_key), None) => { + ChildNode::key_key(key, child_key, child, child_draft) + } + (None, Some(index)) => ChildNode::key_index(key, index, child, child_draft), + (None, None) => ChildNode::key(key, child, child_draft), + (Some(_), Some(_)) => { + unreachable!("child nodes never have both a child key and index") + } + }; + f(node) + }, + )?; + } + Ok(()) +} + +pub(crate) fn analyze_object( + schema: &Map, + _draft: Draft, +) -> ObjectAnalysis<'_> { let mut raw_id = None; let mut ref_ = None; let mut schema_ref = None; @@ -94,18 +132,7 @@ where "id" => raw_id = value.as_str(), "$ref" => ref_ = value.as_str(), "$schema" => schema_ref = value.as_str(), - other => { - for_each_child( - other, - value, - draft, - &mut |kw, nk, idx, v, d| match (nk, idx) { - (Some(k), _) => children.push_key_key(kw, k, v, d), - (_, Some(i)) => children.push_key_index(kw, i, v, d), - _ => children.push_key(kw, v, d), - }, - ); - } + _ => {} } } @@ -200,11 +227,13 @@ pub(crate) fn maybe_in_subresource<'r>( #[cfg(test)] mod tests { - use crate::Draft; - use serde_json::json; + use crate::{spec::PathSegment, Draft}; + use serde_json::{json, Value}; + + use super::{for_each_child, for_each_owned_child}; #[test] - fn test_analyze_object_into_collects_control_keys() { + fn test_analyze_object_collects_control_keys() { let schema = json!({ "id": "http://example.com/node", "$schema": "http://example.com/meta", @@ -213,23 +242,20 @@ mod tests { }, "items": {"type": "integer"} }); - let mut children: Vec<(&serde_json::Value, Draft)> = Vec::new(); - let analysis = Draft::Draft4.analyze_object_into( + let analysis = Draft::Draft4.analyze_object( schema .as_object() .expect("schema object should be analyzed"), - &mut children, ); assert_eq!(analysis.id, Some("http://example.com/node")); assert!(!analysis.has_anchor); assert_eq!(analysis.ref_, None); assert_eq!(analysis.schema, Some("http://example.com/meta")); - assert_eq!(children.len(), 2); } #[test] - fn test_analyze_object_into_collects_refs_and_children() { + fn test_analyze_object_collects_refs_and_schema() { let schema = json!({ "id": "http://example.com/node", "$schema": "http://example.com/meta", @@ -238,12 +264,9 @@ mod tests { }, "items": {"type": "integer"} }); - let mut children: Vec<(&serde_json::Value, Draft)> = Vec::new(); - let analysis = Draft::Draft4.analyze_object_into( - schema.as_object().expect("schema object should be scanned"), - &mut children, - ); + let analysis = Draft::Draft4 + .analyze_object(schema.as_object().expect("schema object should be scanned")); assert_eq!( ( @@ -252,17 +275,10 @@ mod tests { ), (None, Some("http://example.com/meta".to_string())) ); - let children: Vec<_> = children - .iter() - .map(|(child, child_draft)| ((*child).clone(), *child_draft)) - .collect(); - assert_eq!(children.len(), 2); - assert!(children.contains(&(json!({"type": "string"}), Draft::Draft4))); - assert!(children.contains(&(json!({"type": "integer"}), Draft::Draft4))); } #[test] - fn test_analyze_object_into_draft4_treats_hash_id_as_anchor() { + fn test_analyze_object_draft4_treats_hash_id_as_anchor() { let schema = json!({ "id": "#node", "dependencies": { @@ -271,11 +287,103 @@ mod tests { }); let object = schema.as_object().unwrap(); - let mut children: Vec<(&serde_json::Value, Draft)> = Vec::new(); - let analysis = Draft::Draft4.analyze_object_into(object, &mut children); + let analysis = Draft::Draft4.analyze_object(object); assert!(analysis.has_anchor); assert_eq!(analysis.id, None); - assert_eq!(children.len(), 1); + } + + #[test] + fn test_for_each_owned_child_streams_filtered_dependency_children() { + let schema = json!({ + "items": [ + { "type": "integer" } + ], + "dependencies": { + "name": { "type": "string" }, + "flag": [ "ignored" ] + } + }); + + let object = schema.as_object().unwrap(); + let mut seen = Vec::new(); + + for_each_owned_child(object, Draft::Draft4, &mut |child| { + let (shape, path) = match (child.first, child.second) { + (PathSegment::Key(key), None) => ("singleton", key.to_owned()), + (PathSegment::Key(key), Some(PathSegment::Index(index))) => { + ("key_index", format!("{key}/{index}")) + } + (PathSegment::Key(key), Some(PathSegment::Key(child_key))) => { + ("key_key", format!("{key}/{child_key}")) + } + (PathSegment::Index(_), _) => { + unreachable!("owned children always start with a key") + } + }; + + seen.push((shape.to_owned(), path, child.value.clone(), child.draft)); + Ok(()) + }) + .unwrap(); + + assert_eq!( + seen, + vec![ + ( + "key_key".to_string(), + "dependencies/name".to_string(), + json!({ "type": "string" }), + Draft::Draft4, + ), + ( + "key_index".to_string(), + "items/0".to_string(), + json!({ "type": "integer" }), + Draft::Draft4, + ), + ] + ); + } + + #[test] + fn test_for_each_child_streams_expected_children() { + let schema = json!({ + "additionalProperties": { + "$schema": "https://json-schema.org/draft/2020-12/schema", + "type": "string" + }, + "items": [ + { "type": "integer" } + ], + "dependencies": { + "name": { "type": "boolean" }, + "flag": [ "ignored" ] + } + }); + + let object = schema.as_object().unwrap(); + let mut seen: Vec<(Value, Draft)> = Vec::new(); + + for_each_child(object, Draft::Draft4, &mut |child, draft| { + seen.push((child.clone(), draft)); + Ok(()) + }) + .unwrap(); + + assert_eq!( + seen, + vec![ + ( + json!({ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "type": "string" + }), + Draft::Draft202012, + ), + (json!({ "type": "boolean" }), Draft::Draft4), + (json!({ "type": "integer" }), Draft::Draft4), + ] + ); } } diff --git a/crates/jsonschema-referencing/src/spec/draft6.rs b/crates/jsonschema-referencing/src/spec/draft6.rs index 491db8f1..6e5e77f5 100644 --- a/crates/jsonschema-referencing/src/spec/draft6.rs +++ b/crates/jsonschema-referencing/src/spec/draft6.rs @@ -2,26 +2,26 @@ use serde_json::{Map, Value}; use crate::{ draft::Draft, - spec::{ChildBuffer, ObjectAnalysis}, + spec::{ChildNode, ObjectAnalysis}, Error, Resolver, ResourceRef, Segments, }; use super::draft202012::{self, SubresourceIteratorInner}; -fn for_each_child<'a>( +fn visit_child<'a>( key: &'a str, value: &'a Value, draft: Draft, - f: &mut impl FnMut(&'a str, Option<&'a str>, Option, &'a Value, Draft), -) { + f: &mut impl FnMut(&'a str, Option<&'a str>, Option, &'a Value, Draft) -> Result<(), Error>, +) -> Result<(), Error> { match key { "additionalItems" | "additionalProperties" | "contains" | "not" | "propertyNames" => { - f(key, None, None, value, draft.detect(value)); + f(key, None, None, value, draft.detect(value))?; } "allOf" | "anyOf" | "oneOf" => { if let Some(arr) = value.as_array() { for (index, item) in arr.iter().enumerate() { - f(key, None, Some(index), item, draft.detect(item)); + f(key, None, Some(index), item, draft.detect(item))?; } } } @@ -34,17 +34,17 @@ fn for_each_child<'a>( None, child_value, draft.detect(child_value), - ); + )?; } } } "items" => match value { Value::Array(arr) => { for (index, item) in arr.iter().enumerate() { - f("items", None, Some(index), item, draft.detect(item)); + f("items", None, Some(index), item, draft.detect(item))?; } } - _ => f("items", None, None, value, draft.detect(value)), + _ => f("items", None, None, value, draft.detect(value))?, }, "dependencies" => { if let Some(obj) = value.as_object() { @@ -58,22 +58,60 @@ fn for_each_child<'a>( None, child_value, draft.detect(child_value), - ); + )?; } } } _ => {} } + Ok(()) } -pub(crate) fn analyze_object_into<'a, C>( +pub(crate) fn for_each_child<'a>( schema: &'a Map, draft: Draft, - children: &mut C, -) -> ObjectAnalysis<'a> -where - C: ChildBuffer<'a>, -{ + f: &mut impl FnMut(&'a Value, Draft) -> Result<(), Error>, +) -> Result<(), Error> { + for (key, value) in schema { + visit_child(key, value, draft, &mut |_, _, _, child, child_draft| { + f(child, child_draft) + })?; + } + Ok(()) +} + +pub(crate) fn for_each_owned_child<'a>( + schema: &'a Map, + draft: Draft, + f: &mut impl FnMut(ChildNode<'a>) -> Result<(), Error>, +) -> Result<(), Error> { + for (key, value) in schema { + visit_child( + key, + value, + draft, + &mut |key, child_key, index, child, child_draft| { + let node = match (child_key, index) { + (Some(child_key), None) => { + ChildNode::key_key(key, child_key, child, child_draft) + } + (None, Some(index)) => ChildNode::key_index(key, index, child, child_draft), + (None, None) => ChildNode::key(key, child, child_draft), + (Some(_), Some(_)) => { + unreachable!("child nodes never have both a child key and index") + } + }; + f(node) + }, + )?; + } + Ok(()) +} + +pub(crate) fn analyze_object( + schema: &Map, + _draft: Draft, +) -> ObjectAnalysis<'_> { let mut raw_id = None; let mut ref_ = None; let mut schema_ref = None; @@ -83,18 +121,7 @@ where "$id" => raw_id = value.as_str(), "$ref" => ref_ = value.as_str(), "$schema" => schema_ref = value.as_str(), - other => { - for_each_child( - other, - value, - draft, - &mut |kw, nk, idx, v, d| match (nk, idx) { - (Some(k), _) => children.push_key_key(kw, k, v, d), - (_, Some(i)) => children.push_key_index(kw, i, v, d), - _ => children.push_key(kw, v, d), - }, - ); - } + _ => {} } } @@ -179,11 +206,13 @@ pub(crate) fn maybe_in_subresource<'r>( #[cfg(test)] mod tests { - use crate::Draft; - use serde_json::json; + use crate::{spec::PathSegment, Draft, Error}; + use serde_json::{json, Value}; + + use super::{for_each_child, for_each_owned_child}; #[test] - fn test_analyze_object_into_draft6_keeps_plain_id_as_resource() { + fn test_analyze_object_draft6_keeps_plain_id_as_resource() { let schema = json!({ "$id": "child.json", "properties": { @@ -192,11 +221,150 @@ mod tests { }); let object = schema.as_object().unwrap(); - let mut children: Vec<(&serde_json::Value, Draft)> = Vec::new(); - let analysis = Draft::Draft6.analyze_object_into(object, &mut children); + let analysis = Draft::Draft6.analyze_object(object); assert_eq!(analysis.id, Some("child.json")); assert!(!analysis.has_anchor); - assert_eq!(children.len(), 1); + } + + #[test] + fn test_for_each_child_streams_expected_children() { + let schema = json!({ + "not": { + "$schema": "https://json-schema.org/draft/2020-12/schema", + "type": "null" + }, + "properties": { + "name": { "type": "string" } + } + }); + + let object = schema.as_object().unwrap(); + let mut seen: Vec<(Value, Draft)> = Vec::new(); + + for_each_child(object, Draft::Draft6, &mut |child, draft| { + seen.push((child.clone(), draft)); + Ok(()) + }) + .unwrap(); + + assert_eq!( + seen, + vec![ + ( + json!({ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "type": "null" + }), + Draft::Draft202012, + ), + (json!({ "type": "string" }), Draft::Draft6), + ] + ); + } + + #[test] + fn test_for_each_owned_child_streams_expected_shapes() { + let schema = json!({ + "allOf": [ + { "minimum": 1 } + ], + "not": { "type": "null" }, + "properties": { + "name": { "type": "string" } + } + }); + + let object = schema.as_object().unwrap(); + let mut seen = Vec::new(); + + for_each_owned_child(object, Draft::Draft6, &mut |child| { + let (shape, path) = match (child.first, child.second) { + (PathSegment::Key(key), None) => ("singleton", key.to_owned()), + (PathSegment::Key(key), Some(PathSegment::Index(index))) => { + ("key_index", format!("{key}/{index}")) + } + (PathSegment::Key(key), Some(PathSegment::Key(child_key))) => { + ("key_key", format!("{key}/{child_key}")) + } + (PathSegment::Index(_), _) => { + unreachable!("owned children always start with a key") + } + }; + + seen.push((shape.to_owned(), path, child.value.clone(), child.draft)); + Ok(()) + }) + .unwrap(); + + assert_eq!( + seen, + vec![ + ( + "key_index".to_string(), + "allOf/0".to_string(), + json!({ "minimum": 1 }), + Draft::Draft6, + ), + ( + "singleton".to_string(), + "not".to_string(), + json!({ "type": "null" }), + Draft::Draft6, + ), + ( + "key_key".to_string(), + "properties/name".to_string(), + json!({ "type": "string" }), + Draft::Draft6, + ), + ] + ); + } + + #[test] + fn test_for_each_child_stops_and_returns_err() { + let schema = json!({ + "allOf": [ + { "minimum": 1 } + ], + "not": { + "$schema": "https://json-schema.org/draft/2020-12/schema", + "type": "null" + }, + "properties": { + "name": { "type": "string" } + } + }); + + let object = schema.as_object().unwrap(); + let mut seen: Vec<(Value, Draft)> = Vec::new(); + + let error = for_each_child(object, Draft::Draft6, &mut |child, draft| { + seen.push((child.clone(), draft)); + if seen.len() == 2 { + return Err(Error::unknown_specification("stop")); + } + Ok(()) + }) + .expect_err("the callback error should stop traversal"); + + assert_eq!( + seen, + vec![ + (json!({ "minimum": 1 }), Draft::Draft6), + ( + json!({ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "type": "null" + }), + Draft::Draft202012, + ), + ] + ); + assert_eq!( + error.to_string(), + "Unknown meta-schema: 'stop'. Custom meta-schemas must be registered in the registry before use" + ); } } diff --git a/crates/jsonschema-referencing/src/spec/draft7.rs b/crates/jsonschema-referencing/src/spec/draft7.rs index 412644d8..42933c55 100644 --- a/crates/jsonschema-referencing/src/spec/draft7.rs +++ b/crates/jsonschema-referencing/src/spec/draft7.rs @@ -2,18 +2,18 @@ use serde_json::{Map, Value}; use crate::{ draft::Draft, - spec::{ChildBuffer, ObjectAnalysis}, + spec::{ChildNode, ObjectAnalysis}, Error, Resolver, ResourceRef, Segments, }; use super::draft202012::{self, SubresourceIteratorInner}; -fn for_each_child<'a>( +fn visit_child<'a>( key: &'a str, value: &'a Value, draft: Draft, - f: &mut impl FnMut(&'a str, Option<&'a str>, Option, &'a Value, Draft), -) { + f: &mut impl FnMut(&'a str, Option<&'a str>, Option, &'a Value, Draft) -> Result<(), Error>, +) -> Result<(), Error> { match key { "additionalItems" | "additionalProperties" @@ -23,12 +23,12 @@ fn for_each_child<'a>( | "not" | "propertyNames" | "then" => { - f(key, None, None, value, draft.detect(value)); + f(key, None, None, value, draft.detect(value))?; } "allOf" | "anyOf" | "oneOf" => { if let Some(arr) = value.as_array() { for (index, item) in arr.iter().enumerate() { - f(key, None, Some(index), item, draft.detect(item)); + f(key, None, Some(index), item, draft.detect(item))?; } } } @@ -41,17 +41,17 @@ fn for_each_child<'a>( None, child_value, draft.detect(child_value), - ); + )?; } } } "items" => match value { Value::Array(arr) => { for (index, item) in arr.iter().enumerate() { - f("items", None, Some(index), item, draft.detect(item)); + f("items", None, Some(index), item, draft.detect(item))?; } } - _ => f("items", None, None, value, draft.detect(value)), + _ => f("items", None, None, value, draft.detect(value))?, }, "dependencies" => { if let Some(obj) = value.as_object() { @@ -65,22 +65,60 @@ fn for_each_child<'a>( None, child_value, draft.detect(child_value), - ); + )?; } } } _ => {} } + Ok(()) } -pub(crate) fn analyze_object_into<'a, C>( +pub(crate) fn for_each_child<'a>( schema: &'a Map, draft: Draft, - children: &mut C, -) -> ObjectAnalysis<'a> -where - C: ChildBuffer<'a>, -{ + f: &mut impl FnMut(&'a Value, Draft) -> Result<(), Error>, +) -> Result<(), Error> { + for (key, value) in schema { + visit_child(key, value, draft, &mut |_, _, _, child, child_draft| { + f(child, child_draft) + })?; + } + Ok(()) +} + +pub(crate) fn for_each_owned_child<'a>( + schema: &'a Map, + draft: Draft, + f: &mut impl FnMut(ChildNode<'a>) -> Result<(), Error>, +) -> Result<(), Error> { + for (key, value) in schema { + visit_child( + key, + value, + draft, + &mut |key, child_key, index, child, child_draft| { + let node = match (child_key, index) { + (Some(child_key), None) => { + ChildNode::key_key(key, child_key, child, child_draft) + } + (None, Some(index)) => ChildNode::key_index(key, index, child, child_draft), + (None, None) => ChildNode::key(key, child, child_draft), + (Some(_), Some(_)) => { + unreachable!("child nodes never have both a child key and index") + } + }; + f(node) + }, + )?; + } + Ok(()) +} + +pub(crate) fn analyze_object( + schema: &Map, + _draft: Draft, +) -> ObjectAnalysis<'_> { let mut raw_id = None; let mut ref_ = None; let mut schema_ref = None; @@ -90,18 +128,7 @@ where "$id" => raw_id = value.as_str(), "$ref" => ref_ = value.as_str(), "$schema" => schema_ref = value.as_str(), - other => { - for_each_child( - other, - value, - draft, - &mut |kw, nk, idx, v, d| match (nk, idx) { - (Some(k), _) => children.push_key_key(kw, k, v, d), - (_, Some(i)) => children.push_key_index(kw, i, v, d), - _ => children.push_key(kw, v, d), - }, - ); - } + _ => {} } } @@ -203,11 +230,13 @@ pub(crate) fn maybe_in_subresource<'r>( #[cfg(test)] mod tests { - use crate::Draft; - use serde_json::json; + use crate::{spec::PathSegment, Draft}; + use serde_json::{json, Value}; + + use super::{for_each_child, for_each_owned_child}; #[test] - fn test_analyze_object_into_draft7_keeps_plain_id_as_resource() { + fn test_analyze_object_draft7_keeps_plain_id_as_resource() { let schema = json!({ "$id": "child.json", "properties": { @@ -216,11 +245,108 @@ mod tests { }); let object = schema.as_object().unwrap(); - let mut children: Vec<(&serde_json::Value, Draft)> = Vec::new(); - let analysis = Draft::Draft7.analyze_object_into(object, &mut children); + let analysis = Draft::Draft7.analyze_object(object); assert_eq!(analysis.id, Some("child.json")); assert!(!analysis.has_anchor); - assert_eq!(children.len(), 1); + } + + #[test] + fn test_for_each_owned_child_streams_expected_shapes() { + let schema = json!({ + "then": { "type": "null" }, + "allOf": [ + { "minimum": 1 } + ], + "properties": { + "name": { "type": "string" } + } + }); + + let object = schema.as_object().unwrap(); + let mut seen = Vec::new(); + + for_each_owned_child(object, Draft::Draft7, &mut |child| { + let (shape, path) = match (child.first, child.second) { + (PathSegment::Key(key), None) => ("singleton", key.to_owned()), + (PathSegment::Key(key), Some(PathSegment::Index(index))) => { + ("key_index", format!("{key}/{index}")) + } + (PathSegment::Key(key), Some(PathSegment::Key(child_key))) => { + ("key_key", format!("{key}/{child_key}")) + } + (PathSegment::Index(_), _) => { + unreachable!("owned children always start with a key") + } + }; + + seen.push((shape.to_owned(), path, child.value.clone(), child.draft)); + Ok(()) + }) + .unwrap(); + + assert_eq!( + seen, + vec![ + ( + "key_index".to_string(), + "allOf/0".to_string(), + json!({ "minimum": 1 }), + Draft::Draft7, + ), + ( + "key_key".to_string(), + "properties/name".to_string(), + json!({ "type": "string" }), + Draft::Draft7, + ), + ( + "singleton".to_string(), + "then".to_string(), + json!({ "type": "null" }), + Draft::Draft7, + ), + ] + ); + } + + #[test] + fn test_for_each_child_streams_expected_values() { + let schema = json!({ + "then": { + "$schema": "https://json-schema.org/draft/2020-12/schema", + "type": "null" + }, + "allOf": [ + { "minimum": 1 } + ], + "properties": { + "name": { "type": "string" } + } + }); + + let object = schema.as_object().unwrap(); + let mut seen: Vec<(Value, Draft)> = Vec::new(); + + for_each_child(object, Draft::Draft7, &mut |child, draft| { + seen.push((child.clone(), draft)); + Ok(()) + }) + .unwrap(); + + assert_eq!( + seen, + vec![ + (json!({ "minimum": 1 }), Draft::Draft7), + (json!({ "type": "string" }), Draft::Draft7,), + ( + json!({ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "type": "null" + }), + Draft::Draft202012, + ), + ] + ); } } diff --git a/crates/jsonschema-referencing/src/spec/mod.rs b/crates/jsonschema-referencing/src/spec/mod.rs index 5ebecd7f..4427fe89 100644 --- a/crates/jsonschema-referencing/src/spec/mod.rs +++ b/crates/jsonschema-referencing/src/spec/mod.rs @@ -78,44 +78,3 @@ impl<'a> ChildNode<'a> { } } } - -/// Sink for analyzer-emitted child schemas. -pub(crate) trait ChildBuffer<'a> { - fn push_key(&mut self, key: &'a str, value: &'a Value, draft: Draft); - fn push_key_index(&mut self, key: &'a str, index: usize, value: &'a Value, draft: Draft); - fn push_key_key(&mut self, key: &'a str, child_key: &'a str, value: &'a Value, draft: Draft); -} - -impl<'a> ChildBuffer<'a> for Vec<(&'a Value, Draft)> { - #[inline] - fn push_key(&mut self, _key: &'a str, value: &'a Value, draft: Draft) { - self.push((value, draft)); - } - - #[inline] - fn push_key_index(&mut self, _key: &'a str, _index: usize, value: &'a Value, draft: Draft) { - self.push((value, draft)); - } - - #[inline] - fn push_key_key(&mut self, _key: &'a str, _child_key: &'a str, value: &'a Value, draft: Draft) { - self.push((value, draft)); - } -} - -impl<'a> ChildBuffer<'a> for Vec> { - #[inline] - fn push_key(&mut self, key: &'a str, value: &'a Value, draft: Draft) { - self.push(ChildNode::key(key, value, draft)); - } - - #[inline] - fn push_key_index(&mut self, key: &'a str, index: usize, value: &'a Value, draft: Draft) { - self.push(ChildNode::key_index(key, index, value, draft)); - } - - #[inline] - fn push_key_key(&mut self, key: &'a str, child_key: &'a str, value: &'a Value, draft: Draft) { - self.push(ChildNode::key_key(key, child_key, value, draft)); - } -} From 047e4412332484dce204120fec179e677dd03eb8 Mon Sep 17 00:00:00 2001 From: Dmitry Dygalo Date: Wed, 8 Apr 2026 00:50:10 +0200 Subject: [PATCH 14/14] wip Signed-off-by: Dmitry Dygalo --- .../src/registry/build.rs | 2 +- .../src/spec/draft201909.rs | 24 +++++++------------ .../src/spec/draft202012.rs | 23 +++++++----------- .../jsonschema-referencing/src/spec/draft4.rs | 23 +++++++----------- .../jsonschema-referencing/src/spec/draft6.rs | 23 +++++++----------- .../jsonschema-referencing/src/spec/draft7.rs | 23 +++++++----------- crates/jsonschema-referencing/src/spec/mod.rs | 18 ++++++++++++++ 7 files changed, 59 insertions(+), 77 deletions(-) diff --git a/crates/jsonschema-referencing/src/registry/build.rs b/crates/jsonschema-referencing/src/registry/build.rs index a34a388d..2d68146c 100644 --- a/crates/jsonschema-referencing/src/registry/build.rs +++ b/crates/jsonschema-referencing/src/registry/build.rs @@ -204,7 +204,7 @@ impl ReferenceKey { } } -/// Clears a [`VisitedLocalRefs`] set and reinterprets it with a different borrow lifetime, +/// Clears a [`VisitedRefs`] set and reinterprets it with a different borrow lifetime, /// reusing the backing heap allocation across processing phases. /// /// # Safety diff --git a/crates/jsonschema-referencing/src/spec/draft201909.rs b/crates/jsonschema-referencing/src/spec/draft201909.rs index 68ba4c38..a081fd36 100644 --- a/crates/jsonschema-referencing/src/spec/draft201909.rs +++ b/crates/jsonschema-referencing/src/spec/draft201909.rs @@ -101,27 +101,20 @@ pub(crate) fn for_each_owned_child<'a>( value, draft, &mut |key, child_key, index, child, child_draft| { - let node = match (child_key, index) { - (Some(child_key), None) => { - ChildNode::key_key(key, child_key, child, child_draft) - } - (None, Some(index)) => ChildNode::key_index(key, index, child, child_draft), - (None, None) => ChildNode::key(key, child, child_draft), - (Some(_), Some(_)) => { - unreachable!("child nodes never have both a child key and index") - } - }; - f(node) + f(ChildNode::from_parts( + key, + child_key, + index, + child, + child_draft, + )) }, )?; } Ok(()) } -pub(crate) fn analyze_object( - schema: &Map, - _draft: Draft, -) -> ObjectAnalysis<'_> { +pub(crate) fn analyze_object(schema: &Map, _draft: Draft) -> ObjectAnalysis<'_> { let mut id = None; let mut has_anchor = false; let mut ref_ = None; @@ -133,7 +126,6 @@ pub(crate) fn analyze_object( "$anchor" => has_anchor |= value.as_str().is_some(), "$ref" => ref_ = value.as_str(), "$schema" => schema_ref = value.as_str(), - "$dynamicAnchor" => {} _ => {} } } diff --git a/crates/jsonschema-referencing/src/spec/draft202012.rs b/crates/jsonschema-referencing/src/spec/draft202012.rs index 1eb05a3a..30e201e6 100644 --- a/crates/jsonschema-referencing/src/spec/draft202012.rs +++ b/crates/jsonschema-referencing/src/spec/draft202012.rs @@ -79,27 +79,20 @@ pub(crate) fn for_each_owned_child<'a>( value, draft, &mut |key, child_key, index, child, child_draft| { - let node = match (child_key, index) { - (Some(child_key), None) => { - ChildNode::key_key(key, child_key, child, child_draft) - } - (None, Some(index)) => ChildNode::key_index(key, index, child, child_draft), - (None, None) => ChildNode::key(key, child, child_draft), - (Some(_), Some(_)) => { - unreachable!("child nodes never have both a child key and index") - } - }; - f(node) + f(ChildNode::from_parts( + key, + child_key, + index, + child, + child_draft, + )) }, )?; } Ok(()) } -pub(crate) fn analyze_object( - schema: &Map, - _draft: Draft, -) -> ObjectAnalysis<'_> { +pub(crate) fn analyze_object(schema: &Map, _draft: Draft) -> ObjectAnalysis<'_> { let mut id = None; let mut has_anchor = false; let mut ref_ = None; diff --git a/crates/jsonschema-referencing/src/spec/draft4.rs b/crates/jsonschema-referencing/src/spec/draft4.rs index cb6a637a..7f6a4662 100644 --- a/crates/jsonschema-referencing/src/spec/draft4.rs +++ b/crates/jsonschema-referencing/src/spec/draft4.rs @@ -102,27 +102,20 @@ pub(crate) fn for_each_owned_child<'a>( value, draft, &mut |key, child_key, index, child, child_draft| { - let node = match (child_key, index) { - (Some(child_key), None) => { - ChildNode::key_key(key, child_key, child, child_draft) - } - (None, Some(index)) => ChildNode::key_index(key, index, child, child_draft), - (None, None) => ChildNode::key(key, child, child_draft), - (Some(_), Some(_)) => { - unreachable!("child nodes never have both a child key and index") - } - }; - f(node) + f(ChildNode::from_parts( + key, + child_key, + index, + child, + child_draft, + )) }, )?; } Ok(()) } -pub(crate) fn analyze_object( - schema: &Map, - _draft: Draft, -) -> ObjectAnalysis<'_> { +pub(crate) fn analyze_object(schema: &Map, _draft: Draft) -> ObjectAnalysis<'_> { let mut raw_id = None; let mut ref_ = None; let mut schema_ref = None; diff --git a/crates/jsonschema-referencing/src/spec/draft6.rs b/crates/jsonschema-referencing/src/spec/draft6.rs index 6e5e77f5..f7b1ab33 100644 --- a/crates/jsonschema-referencing/src/spec/draft6.rs +++ b/crates/jsonschema-referencing/src/spec/draft6.rs @@ -91,27 +91,20 @@ pub(crate) fn for_each_owned_child<'a>( value, draft, &mut |key, child_key, index, child, child_draft| { - let node = match (child_key, index) { - (Some(child_key), None) => { - ChildNode::key_key(key, child_key, child, child_draft) - } - (None, Some(index)) => ChildNode::key_index(key, index, child, child_draft), - (None, None) => ChildNode::key(key, child, child_draft), - (Some(_), Some(_)) => { - unreachable!("child nodes never have both a child key and index") - } - }; - f(node) + f(ChildNode::from_parts( + key, + child_key, + index, + child, + child_draft, + )) }, )?; } Ok(()) } -pub(crate) fn analyze_object( - schema: &Map, - _draft: Draft, -) -> ObjectAnalysis<'_> { +pub(crate) fn analyze_object(schema: &Map, _draft: Draft) -> ObjectAnalysis<'_> { let mut raw_id = None; let mut ref_ = None; let mut schema_ref = None; diff --git a/crates/jsonschema-referencing/src/spec/draft7.rs b/crates/jsonschema-referencing/src/spec/draft7.rs index 42933c55..ba43ceca 100644 --- a/crates/jsonschema-referencing/src/spec/draft7.rs +++ b/crates/jsonschema-referencing/src/spec/draft7.rs @@ -98,27 +98,20 @@ pub(crate) fn for_each_owned_child<'a>( value, draft, &mut |key, child_key, index, child, child_draft| { - let node = match (child_key, index) { - (Some(child_key), None) => { - ChildNode::key_key(key, child_key, child, child_draft) - } - (None, Some(index)) => ChildNode::key_index(key, index, child, child_draft), - (None, None) => ChildNode::key(key, child, child_draft), - (Some(_), Some(_)) => { - unreachable!("child nodes never have both a child key and index") - } - }; - f(node) + f(ChildNode::from_parts( + key, + child_key, + index, + child, + child_draft, + )) }, )?; } Ok(()) } -pub(crate) fn analyze_object( - schema: &Map, - _draft: Draft, -) -> ObjectAnalysis<'_> { +pub(crate) fn analyze_object(schema: &Map, _draft: Draft) -> ObjectAnalysis<'_> { let mut raw_id = None; let mut ref_ = None; let mut schema_ref = None; diff --git a/crates/jsonschema-referencing/src/spec/mod.rs b/crates/jsonschema-referencing/src/spec/mod.rs index 4427fe89..bb176c2e 100644 --- a/crates/jsonschema-referencing/src/spec/mod.rs +++ b/crates/jsonschema-referencing/src/spec/mod.rs @@ -77,4 +77,22 @@ impl<'a> ChildNode<'a> { draft, } } + + #[inline] + pub(crate) fn from_parts( + key: &'a str, + child_key: Option<&'a str>, + index: Option, + value: &'a Value, + draft: Draft, + ) -> Self { + match (child_key, index) { + (Some(child_key), None) => Self::key_key(key, child_key, value, draft), + (None, Some(index)) => Self::key_index(key, index, value, draft), + (None, None) => Self::key(key, value, draft), + (Some(_), Some(_)) => { + unreachable!("child nodes never have both a child key and index") + } + } + } }