diff --git a/crates/jsonschema-py/src/lib.rs b/crates/jsonschema-py/src/lib.rs index a0c03486..1dd64125 100644 --- a/crates/jsonschema-py/src/lib.rs +++ b/crates/jsonschema-py/src/lib.rs @@ -597,16 +597,16 @@ thread_local! { static LAST_FORMAT_ERROR: RefCell> = const { RefCell::new(None) }; } -fn make_options( +fn make_options<'a>( draft: Option, formats: Option<&Bound<'_, PyDict>>, validate_formats: Option, ignore_unknown_formats: Option, retriever: Option<&Bound<'_, PyAny>>, - registry: Option<®istry::Registry>, + registry: Option<&'a registry::Registry>, base_uri: Option, pattern_options: Option<&Bound<'_, PyAny>>, -) -> PyResult { +) -> PyResult> { let mut options = jsonschema::options(); if let Some(raw_draft_version) = draft { options = options.with_draft(get_draft(raw_draft_version)?); @@ -652,7 +652,7 @@ fn make_options( options = options.with_retriever(Retriever { func }); } if let Some(registry) = registry { - options = options.with_registry(registry.inner.clone()); + options = options.with_registry(®istry.inner); } if let Some(base_uri) = base_uri { options = options.with_base_uri(base_uri); @@ -1539,7 +1539,7 @@ mod meta { let schema = crate::ser::to_value(schema)?; let result = if let Some(registry) = registry { jsonschema::meta::options() - .with_registry(registry.inner.clone()) + .with_registry(®istry.inner) .validate(&schema) } else { jsonschema::meta::validate(&schema) @@ -1588,7 +1588,7 @@ mod meta { let schema = crate::ser::to_value(schema)?; let result = if let Some(registry) = registry { jsonschema::meta::options() - .with_registry(registry.inner.clone()) + .with_registry(®istry.inner) .validate(&schema) } else { jsonschema::meta::validate(&schema) diff --git a/crates/jsonschema-py/src/registry.rs b/crates/jsonschema-py/src/registry.rs index d10fde99..580e3a2e 100644 --- a/crates/jsonschema-py/src/registry.rs +++ b/crates/jsonschema-py/src/registry.rs @@ -6,7 +6,7 @@ use crate::{get_draft, retriever::into_retriever, to_value, Retriever}; /// A registry of JSON Schema resources, each identified by their canonical URIs. #[pyclass] pub(crate) struct Registry { - pub(crate) inner: jsonschema::Registry, + pub(crate) inner: jsonschema::Registry<'static>, } #[pymethods] diff --git a/crates/jsonschema-referencing/benches/anchor.rs b/crates/jsonschema-referencing/benches/anchor.rs index fce92538..8e3e0001 100644 --- a/crates/jsonschema-referencing/benches/anchor.rs +++ b/crates/jsonschema-referencing/benches/anchor.rs @@ -24,10 +24,13 @@ fn bench_anchor_lookup(c: &mut Criterion) { BenchmarkId::new("resolve", "small"), ®istry, |b, registry| { - let resolver = registry - .try_resolver("http://example.com/") - .expect("Invalid base URI"); - b.iter_with_large_drop(|| resolver.lookup(black_box("#foo"))); + let context = registry.context(); + b.iter_with_large_drop(|| { + let resolver = context + .try_resolver("http://example.com/") + .expect("Invalid base URI"); + resolver.lookup(black_box("#foo")) + }); }, ); diff --git a/crates/jsonschema-referencing/benches/pointer.rs b/crates/jsonschema-referencing/benches/pointer.rs index abce3047..8084de62 100644 --- a/crates/jsonschema-referencing/benches/pointer.rs +++ b/crates/jsonschema-referencing/benches/pointer.rs @@ -45,10 +45,13 @@ fn bench_pointers(c: &mut Criterion) { BenchmarkId::new("pointer", name), ®istry, |b, registry| { - let resolver = registry - .try_resolver("http://example.com/schema.json") - .expect("Invalid base URI"); - b.iter_with_large_drop(|| resolver.lookup(black_box(pointer))); + let context = registry.context(); + b.iter_with_large_drop(|| { + let resolver = context + .try_resolver("http://example.com/schema.json") + .expect("Invalid base URI"); + resolver.lookup(black_box(pointer)) + }); }, ); } diff --git a/crates/jsonschema-referencing/benches/registry.rs b/crates/jsonschema-referencing/benches/registry.rs index b8229755..142a3dbd 100644 --- a/crates/jsonschema-referencing/benches/registry.rs +++ b/crates/jsonschema-referencing/benches/registry.rs @@ -23,16 +23,20 @@ fn bench_subresources(c: &mut Criterion) { for (draft, data, name) in &drafts { let schema = benchmark::read_json(data); - group.bench_with_input(BenchmarkId::new("try_new", name), &schema, |b, schema| { - b.iter_batched( - || draft.create_resource(schema.clone()), - |resource| { - Registry::try_new("http://example.com/schema.json", resource) - .expect("Invalid registry input") - }, - BatchSize::SmallInput, - ); - }); + let schema_ref = draft.create_resource_ref(&schema); + group.bench_with_input( + BenchmarkId::new("try_new", name), + &schema_ref, + |b, schema_ref| { + b.iter(|| { + Registry::try_new( + "http://example.com/schema.json", + (schema_ref.contents(), schema_ref.draft()), + ) + .expect("Invalid registry input") + }); + }, + ); } let drafts = [ (Draft::Draft4, benchmark::GEOJSON, "GeoJSON"), @@ -50,14 +54,10 @@ fn bench_subresources(c: &mut Criterion) { &schema, |b, schema| { b.iter_batched( - || { - ( - draft.create_resource(schema.clone()), - SPECIFICATIONS.clone(), - ) - }, - |(resource, registry)| { - registry.try_with_resource("http://example.com/schema.json", resource) + || SPECIFICATIONS.clone(), + |registry| { + registry + .try_with_resource("http://example.com/schema.json", (schema, *draft)) }, BatchSize::SmallInput, ); diff --git a/crates/jsonschema-referencing/src/anchors/mod.rs b/crates/jsonschema-referencing/src/anchors/mod.rs index ae595306..1db3e002 100644 --- a/crates/jsonschema-referencing/src/anchors/mod.rs +++ b/crates/jsonschema-referencing/src/anchors/mod.rs @@ -90,7 +90,7 @@ impl Anchor { Anchor::Dynamic { name, resource } => { let mut last = resource; for uri in &resolver.dynamic_scope() { - match resolver.registry.anchor(uri, name.as_str()) { + match resolver.context.anchor(uri, name.as_str()) { Ok(anchor) => { if let Anchor::Dynamic { resource, .. } = anchor { last = resource; @@ -216,7 +216,8 @@ mod tests { let one = Draft::Draft202012.create_resource(json!({"$dynamicAnchor": "foo"})); let registry = Registry::try_new("http://example.com", one.clone()).expect("Invalid resources"); - let resolver = registry + let context = registry.context(); + let resolver = context .try_resolver("http://example.com") .expect("Invalid base URI"); let resolved = resolver.lookup("#foo").expect("Lookup failed"); @@ -249,7 +250,8 @@ mod tests { ("http://example.com/foo/bar".to_string(), root.clone()), ]) .expect("Invalid resources"); - let resolver = registry + let context = registry.context(); + let resolver = context .try_resolver("http://example.com") .expect("Invalid base URI"); @@ -290,7 +292,8 @@ mod tests { ("http://example.com/foo/bar".to_string(), two.clone()), ]) .expect("Invalid resources"); - let resolver = registry + let context = registry.context(); + let resolver = context .try_resolver("http://example.com") .expect("Invalid base URI"); @@ -312,7 +315,8 @@ mod tests { } })); let registry = Registry::try_new("http://example.com", schema).expect("Invalid resources"); - let resolver = registry + let context = registry.context(); + let resolver = context .try_resolver("http://example.com") .expect("Invalid base URI"); @@ -331,7 +335,8 @@ mod tests { } })); let registry = Registry::try_new("http://example.com", schema).expect("Invalid resources"); - let resolver = registry + let context = registry.context(); + let resolver = context .try_resolver("http://example.com") .expect("Invalid base URI"); @@ -347,7 +352,8 @@ mod tests { let one = Draft::Draft201909.create_resource(json!({"$recursiveAnchor": true})); let registry = Registry::try_new("http://example.com", one.clone()).expect("Invalid resources"); - let resolver = registry + let context = registry.context(); + let resolver = context .try_resolver("http://example.com") .expect("Invalid base URI"); let first = resolver.lookup("").expect("Lookup failed"); @@ -363,7 +369,8 @@ mod tests { let true_resource = Draft::Draft201909.create_resource(json!(true)); let registry = Registry::try_new("http://example.com", true_resource.clone()) .expect("Invalid resources"); - let resolver = registry + let context = registry.context(); + let resolver = context .try_resolver("http://example.com") .expect("Invalid base URI"); let resolved = resolver.lookup_recursive_ref().expect("Lookup failed"); @@ -398,7 +405,8 @@ mod tests { ]) .expect("Invalid resources"); - let resolver = registry + let context = registry.context(); + let resolver = context .try_resolver("http://example.com") .expect("Invalid base URI"); let first = resolver.lookup("").expect("Lookup failed"); @@ -440,7 +448,8 @@ mod tests { ]) .expect("Invalid resources"); - let resolver = registry + let context = registry.context(); + let resolver = context .try_resolver("http://example.com") .expect("Invalid base URI"); let first = resolver.lookup("").expect("Lookup failed"); diff --git a/crates/jsonschema-referencing/src/builder.rs b/crates/jsonschema-referencing/src/builder.rs new file mode 100644 index 00000000..7e93c17a --- /dev/null +++ b/crates/jsonschema-referencing/src/builder.rs @@ -0,0 +1,467 @@ +//! Builder for constructing Registry instances with borrowed or owned documents. + +use crate::{registry::DocumentVec, uri, Draft, Error, IntoDocument, Retrieve}; +use ahash::AHashMap; +use fluent_uri::Uri; +use serde_json::Value; +use std::{borrow::Cow, sync::Arc}; + +/// Builder for creating a [`Registry`](crate::Registry). +/// +/// The builder pattern ensures that all documents are collected before crawling +/// external references, and provides a clean API for both borrowed and owned schemas. +/// +/// # Examples +/// +/// ```rust +/// use referencing::Registry; +/// use serde_json::json; +/// +/// # fn example() -> Result<(), Box> { +/// let schema1 = json!({"type": "string"}); +/// let schema2 = json!({"type": "number"}); +/// +/// // Borrowed schemas (zero-copy) +/// let registry = Registry::builder() +/// .with_document("https://example.com/schema1", &schema1)? +/// .with_document("https://example.com/schema2", &schema2)? +/// .build()?; +/// # Ok(()) +/// # } +/// ``` +pub struct RegistryBuilder<'doc> { + documents: AHashMap>, (Cow<'doc, Value>, Draft)>, + retriever: Option>, +} + +impl<'doc> RegistryBuilder<'doc> { + /// Create a new empty builder. + #[must_use] + pub fn new() -> RegistryBuilder<'doc> { + RegistryBuilder { + documents: AHashMap::new(), + retriever: None, + } + } + + /// Add a document to the registry. + /// + /// This method accepts any type implementing [`IntoDocument`], which includes: + /// - `&'doc Value` - borrowed schema (auto-detect draft) + /// - `Value` - owned schema (auto-detect draft) + /// - `(&'doc Value, Draft)` - borrowed with explicit draft + /// - `(Value, Draft)` - owned with explicit draft + /// - `Resource` - existing resource type + /// + /// # Examples + /// + /// ```rust + /// use referencing::{Registry, Draft}; + /// use serde_json::json; + /// + /// # fn example() -> Result<(), Box> { + /// let schema = json!({"type": "string"}); + /// + /// let registry = Registry::builder() + /// // Borrowed, auto-detect + /// .with_document("https://example.com/a", &schema)? + /// // Owned, auto-detect + /// .with_document("https://example.com/b", json!({"type": "number"}))? + /// // Borrowed, explicit draft + /// .with_document("https://example.com/c", (&schema, Draft::Draft7))? + /// .build()?; + /// # Ok(()) + /// # } + /// ``` + /// + /// # Errors + /// + /// Returns an error if the URI is invalid. + pub fn with_document( + mut self, + uri: &str, + doc: impl IntoDocument<'doc>, + ) -> Result, Error> { + let (cow, draft) = doc.into_document(); + let parsed_uri = uri::from_str(uri.trim_end_matches('#'))?; + self.documents.insert(Arc::new(parsed_uri), (cow, draft)); + Ok(self) + } + + /// Set the retriever for fetching external references. + /// + /// If a retriever is provided, the builder will recursively fetch all + /// `$ref` references found in the documents during `build()`. + /// + /// # Examples + /// + /// ```rust,no_run + /// use referencing::{Registry, DefaultRetriever}; + /// use serde_json::json; + /// + /// # fn example() -> Result<(), Box> { + /// let schema = json!({ + /// "properties": { + /// "name": {"$ref": "https://example.com/name-schema.json"} + /// } + /// }); + /// + /// let registry = Registry::builder() + /// .with_document("https://example.com/root", &schema)? + /// .with_retriever(DefaultRetriever) + /// .build()?; // Will fetch name-schema.json + /// # Ok(()) + /// # } + /// ``` + #[must_use] + pub fn with_retriever(mut self, retriever: impl crate::IntoRetriever) -> RegistryBuilder<'doc> { + self.retriever = Some(retriever.into_retriever()); + self + } + + /// Build the registry, fetching any external references if a retriever was provided. + /// + /// This consumes the builder and returns a fully constructed [`Registry`](crate::Registry). + /// + /// # Errors + /// + /// Returns an error if: + /// - Any external reference cannot be retrieved + /// - Any URI is invalid + /// - Circular references are detected + pub fn build(self) -> Result, Error> { + use crate::DefaultRetriever; + + // Convert documents to the format expected by process_builder_documents + let pairs: Vec<_> = self.documents.into_iter().collect(); + + let draft = Draft::default(); // TODO: Allow configuring default draft + let retriever = self.retriever.unwrap_or_else(|| Arc::new(DefaultRetriever)); + build_registry_with_retriever(pairs, retriever, draft) + } +} + +impl Default for RegistryBuilder<'_> { + fn default() -> Self { + Self::new() + } +} + +#[cfg(feature = "retrieve-async")] +impl<'doc> RegistryBuilder<'doc> { + /// Set an async retriever for fetching external references. + /// + /// # Examples + /// + /// ```rust,no_run + /// use referencing::{Registry, Resource}; + /// use serde_json::json; + /// + /// # async fn example() -> Result<(), Box> { + /// let schema = json!({ + /// "properties": { + /// "name": {"$ref": "https://example.com/name-schema.json"} + /// } + /// }); + /// + /// struct MyRetriever; + /// + /// #[cfg_attr(target_family = "wasm", async_trait::async_trait(?Send))] + /// #[cfg_attr(not(target_family = "wasm"), async_trait::async_trait)] + /// impl referencing::AsyncRetrieve for MyRetriever { + /// async fn retrieve( + /// &self, + /// _uri: &referencing::Uri, + /// ) -> Result> { + /// Ok(json!({"type": "string"})) + /// } + /// } + /// + /// let registry = Registry::builder() + /// .with_document("https://example.com/root", &schema)? + /// .with_async_retriever(MyRetriever) + /// .build_async().await?; + /// # Ok(()) + /// # } + /// ``` + #[must_use] + pub fn with_async_retriever( + self, + retriever: impl crate::IntoAsyncRetriever, + ) -> AsyncRegistryBuilder<'doc> { + AsyncRegistryBuilder { + documents: self.documents, + retriever: retriever.into_retriever(), + } + } +} + +/// Builder for creating a [`Registry`](crate::Registry) with async retrieval. +#[cfg(feature = "retrieve-async")] +pub struct AsyncRegistryBuilder<'doc> { + documents: AHashMap>, (Cow<'doc, Value>, Draft)>, + retriever: Arc, +} + +#[cfg(feature = "retrieve-async")] +impl<'doc> AsyncRegistryBuilder<'doc> { + /// Add a document to the registry. + /// + /// See [`RegistryBuilder::with_document`] for details. + pub fn with_document(mut self, uri: &str, doc: impl IntoDocument<'doc>) -> Result { + let (cow, draft) = doc.into_document(); + let parsed_uri = uri::from_str(uri.trim_end_matches('#'))?; + self.documents.insert(Arc::new(parsed_uri), (cow, draft)); + Ok(self) + } + + /// Build the registry asynchronously, fetching any external references. + /// + /// # Errors + /// + /// Returns an error if: + /// - Any external reference cannot be retrieved + /// - Any URI is invalid + /// - Circular references are detected + pub async fn build_async(self) -> Result, Error> { + let pairs: Vec<_> = self.documents.into_iter().collect(); + build_registry_with_async_retriever(pairs, self.retriever, Draft::default()).await + } +} + +/// Helper function to build a registry from documents with a retriever. +/// +/// This function: +/// 1. Stores initial documents in the registry +/// 2. Uses temporary resource/anchor maps to discover external references +/// 3. Recursively fetches all external references +/// 4. Stores all fetched documents in the registry +/// 5. Returns a Registry containing ONLY documents (resources/anchors computed later by `ResolutionContext`) +pub(crate) fn build_registry_with_retriever( + documents: DocumentVec<'_>, + retriever: Arc, + default_draft: Draft, +) -> Result, Error> { + use crate::{ + cache::UriCache, + registry::{ + create_resource, handle_fragment, handle_retrieve_error, ProcessingState, ResourceMap, + }, + resource::InnerResourcePtr, + }; + use ahash::AHashMap; + + let mut doc_store = AHashMap::new(); + let mut resolution_cache = UriCache::new(); + + // Temporary maps used ONLY for discovering what to fetch + // These will NOT be stored in the Registry - ResolutionContext will rebuild them + let mut resources = ResourceMap::new(); + let mut anchors = AHashMap::new(); + let mut state = ProcessingState::new(); + + // PHASE 1: Insert all initial documents into doc_store first + // We must complete all insertions BEFORE creating any pointers, otherwise + // HashMap reallocation will invalidate pointers + let mut initial_uris = Vec::new(); + + // Start with SPECIFICATIONS meta-schemas + for (uri, (cow, draft)) in crate::SPECIFICATIONS.documents() { + doc_store.insert(uri.clone(), (Cow::Borrowed(cow.as_ref()), *draft)); + initial_uris.push((uri.clone(), *draft)); + } + + // Add user-provided documents + for (uri, (cow, draft)) in documents { + use std::collections::hash_map::Entry; + + // Only insert if URI doesn't already exist (keep first occurrence) + if let Entry::Vacant(entry) = doc_store.entry(uri.clone()) { + entry.insert((cow, draft)); + initial_uris.push((uri, draft)); + } + // Skip duplicate - keep the first one (SPECIFICATIONS takes precedence) + } + + // PHASE 2: Now that doc_store is stable, create resource pointers + for (uri, draft) in initial_uris { + // Create temporary resource pointer for discovery + let stored_value = match &doc_store[&uri].0 { + std::borrow::Cow::Owned(v) => v as *const Value, + std::borrow::Cow::Borrowed(v) => *v as *const Value, + }; + + let resource_ptr = InnerResourcePtr::new(stored_value, draft); + resources.insert(uri.clone(), resource_ptr.clone()); + + // Track custom metaschemas + if draft == Draft::Unknown { + state.custom_metaschemas.push(uri.clone()); + } + + state.queue.push_back((uri, resource_ptr)); + } + + // Process queue and fetch external references + loop { + if state.queue.is_empty() && state.external.is_empty() { + break; + } + + // Process queue - this discovers external refs + crate::registry::process_queue( + &mut state, + &mut resources, + &mut anchors, + &mut resolution_cache, + )?; + + // Retrieve external resources + for (original, uri, kind, ref_draft) in state.external.drain() { + let mut fragmentless = uri.clone(); + fragmentless.set_fragment(None); + if !resources.contains_key(&fragmentless) { + let retrieved = match retriever.retrieve(&fragmentless) { + Ok(retrieved) => retrieved, + Err(error) => { + handle_retrieve_error(&uri, &original, &fragmentless, error, kind)?; + continue; + } + }; + + // Use the referencing document's draft for fetched resources + // This ensures remote documents are processed with the correct draft + let (key, resource) = create_resource( + retrieved, + fragmentless, + ref_draft, + &mut doc_store, + &mut resources, + &mut state.custom_metaschemas, + ); + handle_fragment(&uri, &resource, &key, ref_draft, &mut state.queue); + state.queue.push_back((key, resource)); + } + } + } + + // Validate custom metaschemas + crate::registry::validate_custom_metaschemas(&state.custom_metaschemas, &resources)?; + + // Return registry with ONLY documents + // Resources and anchors are temporary and discarded + // ResolutionContext will recompute them from documents + Ok(crate::Registry { + documents: doc_store, + resolution_cache: resolution_cache.into_shared(), + retriever, + #[cfg(feature = "retrieve-async")] + async_retriever: None, + draft: default_draft, + }) +} + +#[cfg(feature = "retrieve-async")] +#[allow(clippy::elidable_lifetime_names)] +pub(crate) async fn build_registry_with_async_retriever<'doc>( + documents: DocumentVec<'doc>, + retriever: Arc, + default_draft: Draft, +) -> Result, Error> { + use crate::{ + cache::UriCache, + registry::{ + create_resource, handle_fragment, handle_retrieve_error, ProcessingState, ResourceMap, + }, + resource::InnerResourcePtr, + }; + use ahash::AHashMap; + + let mut doc_store = AHashMap::new(); + let mut resolution_cache = UriCache::new(); + + let mut resources = ResourceMap::new(); + let mut anchors = AHashMap::new(); + let mut state = ProcessingState::new(); + let mut initial_uris = Vec::new(); + + for (uri, (cow, draft)) in crate::SPECIFICATIONS.documents() { + doc_store.insert(uri.clone(), (Cow::Borrowed(cow.as_ref()), *draft)); + initial_uris.push((uri.clone(), *draft)); + } + + for (uri, (cow, draft)) in documents { + use std::collections::hash_map::Entry; + if let Entry::Vacant(entry) = doc_store.entry(uri.clone()) { + entry.insert((cow, draft)); + initial_uris.push((uri, draft)); + } + } + + for (uri, draft) in initial_uris { + let stored_value = match &doc_store[&uri].0 { + std::borrow::Cow::Owned(v) => v as *const Value, + std::borrow::Cow::Borrowed(v) => *v as *const Value, + }; + + let resource_ptr = InnerResourcePtr::new(stored_value, draft); + resources.insert(uri.clone(), resource_ptr.clone()); + + if draft == Draft::Unknown { + state.custom_metaschemas.push(uri.clone()); + } + + state.queue.push_back((uri, resource_ptr)); + } + + loop { + if state.queue.is_empty() && state.external.is_empty() { + break; + } + + crate::registry::process_queue( + &mut state, + &mut resources, + &mut anchors, + &mut resolution_cache, + )?; + + for (original, uri, kind, ref_draft) in state.external.drain() { + let mut fragmentless = uri.clone(); + fragmentless.set_fragment(None); + if resources.contains_key(&fragmentless) { + continue; + } + + let retrieved = match retriever.retrieve(&fragmentless).await { + Ok(retrieved) => retrieved, + Err(error) => { + handle_retrieve_error(&uri, &original, &fragmentless, error, kind)?; + continue; + } + }; + + let (key, resource) = create_resource( + retrieved, + fragmentless, + ref_draft, + &mut doc_store, + &mut resources, + &mut state.custom_metaschemas, + ); + handle_fragment(&uri, &resource, &key, ref_draft, &mut state.queue); + state.queue.push_back((key, resource)); + } + } + + crate::registry::validate_custom_metaschemas(&state.custom_metaschemas, &resources)?; + + Ok(crate::Registry { + documents: doc_store, + resolution_cache: resolution_cache.into_shared(), + retriever: Arc::new(crate::DefaultRetriever), + #[cfg(feature = "retrieve-async")] + async_retriever: Some(retriever), + draft: default_draft, + }) +} diff --git a/crates/jsonschema-referencing/src/cache.rs b/crates/jsonschema-referencing/src/cache.rs index a7b3e1f6..d268d18d 100644 --- a/crates/jsonschema-referencing/src/cache.rs +++ b/crates/jsonschema-referencing/src/cache.rs @@ -140,10 +140,4 @@ impl SharedUriCache { Ok(inserted) } - - pub(crate) fn into_local(self) -> UriCache { - UriCache { - cache: self.cache.into_inner(), - } - } } diff --git a/crates/jsonschema-referencing/src/context.rs b/crates/jsonschema-referencing/src/context.rs new file mode 100644 index 00000000..ade9ab56 --- /dev/null +++ b/crates/jsonschema-referencing/src/context.rs @@ -0,0 +1,200 @@ +//! Resolution context for deriving resources and anchors from a registry. + +use crate::{ + anchors::{Anchor, AnchorKey, AnchorKeyRef}, + resource::{InnerResourcePtr, JsonSchemaResource}, + Draft, Error, Registry, Resolver, +}; +use ahash::AHashMap; +use fluent_uri::Uri; +use serde_json::Value; +use std::{collections::VecDeque, sync::Arc}; + +type ResourceMap = AHashMap>, InnerResourcePtr>; + +/// Resolution context providing a view over a registry with optional root document. +/// +/// This type computes resources and anchors from registry documents on creation. +/// When resolving, it includes both registry documents and an optional root document. +/// +/// # Architecture +/// +/// - Registry: Stores documents only (pure storage) +/// - `ResolutionContext`: Computes resources + anchors from all documents +/// - Resolver: Uses `ResolutionContext` for URI resolution +/// +/// # Lifetimes +/// +/// The `'doc` lifetime represents the lifetime of the documents in the registry. +#[derive(Debug)] +pub struct ResolutionContext<'doc> { + /// Reference to the registry this context was built from + registry: &'doc Registry<'doc>, + + /// ALL resources (from registry documents + root) + resources: ResourceMap, + + /// ALL anchors (from registry documents + root) + anchors: AHashMap, +} + +impl<'doc> ResolutionContext<'doc> { + /// Create a new resolution context from a registry. + /// + /// Computes resources and anchors from all registry documents. + /// + /// # Panics + /// + /// Panics if the registry documents cannot be converted into a valid context. + pub fn new(registry: &'doc Registry<'doc>) -> Self { + let mut context = Self { + registry, + resources: ResourceMap::new(), + anchors: AHashMap::new(), + }; + + let initial = registry.documents.iter().map(|(uri, (cow_value, draft))| { + let value_ptr = match cow_value { + std::borrow::Cow::Borrowed(v) => *v as *const _, + std::borrow::Cow::Owned(v) => v as *const _, + }; + ( + Arc::clone(uri), + InnerResourcePtr::new(value_ptr, *draft), + true, + ) + }); + context.extend_with_documents(initial); + + context + } + + /// Get a resource by URI. + pub(crate) fn get_resource(&self, uri: &Uri) -> Option<&InnerResourcePtr> { + self.resources.get(uri) + } + + /// Add a root document to this context. + /// + /// This is used during compilation to add the schema being validated as + /// a resolvable document without modifying the underlying registry. + /// + /// # Errors + /// + /// This method currently never fails; the `Result` is reserved for future diagnostics. + pub fn with_root_document( + mut self, + uri: Uri, + schema: &'doc Value, + draft: Draft, + ) -> Result { + let uri_arc = Arc::new(uri); + + let resource_ptr = InnerResourcePtr::new(std::ptr::from_ref::(schema), draft); + self.extend_with_documents([(Arc::clone(&uri_arc), resource_ptr, true)]); + + Ok(self) + } + + /// Get the resolution cache from the underlying registry. + #[must_use] + pub(crate) fn resolution_cache(&self) -> &crate::cache::SharedUriCache { + &self.registry.resolution_cache + } + + /// Resolve a URI to the anchor. + pub(crate) fn anchor(&self, uri: &Uri, name: &str) -> Result<&Anchor, Error> { + // Check if anchor name contains invalid characters + if name.contains('/') { + return Err(Error::invalid_anchor(name)); + } + + let key = AnchorKeyRef::new(uri, name); + self.anchors + .get(key.borrow_dyn()) + .ok_or_else(|| Error::no_such_anchor(name)) + } + + /// Resolve a URI against a base. + /// + /// # Errors + /// + /// If the reference is invalid. + pub(crate) fn resolve_against( + &self, + base: &Uri<&str>, + uri: &str, + ) -> Result>, Error> { + self.resolution_cache().resolve_against(base, uri) + } + + /// Create a resolver with the given base URI. + /// + /// # Errors + /// + /// Returns an error if the base URI is invalid. + pub fn try_resolver(&self, base_uri: &str) -> Result, Error> { + let base = crate::uri::from_str(base_uri)?; + Ok(self.resolver(base)) + } + + /// Create a resolver with a known valid base URI. + #[must_use] + pub fn resolver(&self, base_uri: Uri) -> Resolver<'_> { + Resolver::new(self, Arc::new(base_uri)) + } + + fn extend_with_documents( + &mut self, + docs: impl IntoIterator>, InnerResourcePtr, bool)>, + ) { + let mut queue: VecDeque<(Arc>, InnerResourcePtr, bool)> = + docs.into_iter().collect(); + + while let Some((original_base, resource, is_top_level)) = queue.pop_front() { + // Register the resource under its original base only for top-level documents. + if is_top_level { + self.resources + .insert(original_base.clone(), resource.clone()); + } + + let final_base = if let Some(id) = resource.id() { + match self + .registry + .resolution_cache + .resolve_against(&original_base.borrow(), id) + { + Ok(resolved) => { + self.resources.insert(resolved.clone(), resource.clone()); + resolved + } + Err(_) => original_base.clone(), + } + } else { + original_base.clone() + }; + + for anchor in resource.anchors() { + self.anchors + .entry(AnchorKey::new(final_base.clone(), anchor.name())) + .or_insert(anchor); + } + + if is_top_level && final_base != original_base { + for anchor in resource.anchors() { + self.anchors + .entry(AnchorKey::new(original_base.clone(), anchor.name())) + .or_insert(anchor); + } + } + + for subresource_contents in resource.draft().subresources_of(resource.contents()) { + let subresource = InnerResourcePtr::new(subresource_contents, resource.draft()); + queue.push_back((final_base.clone(), subresource.clone(), false)); + if is_top_level && final_base != original_base { + queue.push_back((original_base.clone(), subresource, false)); + } + } + } + } +} diff --git a/crates/jsonschema-referencing/src/lib.rs b/crates/jsonschema-referencing/src/lib.rs index 3e9f2ed8..aadd064d 100644 --- a/crates/jsonschema-referencing/src/lib.rs +++ b/crates/jsonschema-referencing/src/lib.rs @@ -2,7 +2,9 @@ //! //! An implementation-agnostic JSON reference resolution library for Rust. mod anchors; +mod builder; mod cache; +mod context; mod error; mod list; pub mod meta; @@ -16,12 +18,18 @@ pub mod uri; mod vocabularies; pub(crate) use anchors::Anchor; +pub use builder::RegistryBuilder; +pub use context::ResolutionContext; pub use error::{Error, UriError}; pub use fluent_uri::{Iri, IriRef, Uri, UriRef}; pub use list::List; -pub use registry::{parse_index, pointer, Registry, RegistryOptions, SPECIFICATIONS}; +#[cfg(feature = "retrieve-async")] +pub use registry::IntoAsyncRetriever; +pub use registry::{ + parse_index, pointer, IntoRetriever, Registry, RegistryOptions, SPECIFICATIONS, +}; pub use resolver::{Resolved, Resolver}; -pub use resource::{unescape_segment, Resource, ResourceRef}; +pub use resource::{unescape_segment, IntoDocument, Resource, ResourceRef}; pub use retriever::{DefaultRetriever, Retrieve}; pub(crate) use segments::Segments; pub use specification::Draft; diff --git a/crates/jsonschema-referencing/src/meta.rs b/crates/jsonschema-referencing/src/meta.rs index eab98f95..b4363623 100644 --- a/crates/jsonschema-referencing/src/meta.rs +++ b/crates/jsonschema-referencing/src/meta.rs @@ -4,8 +4,6 @@ use serde_json::Value; use std::sync::{Arc, LazyLock}; -use crate::Draft; - macro_rules! schema { ($vis:vis $name:ident, $path:expr) => { $vis static $name: LazyLock> = LazyLock::new(|| { @@ -146,97 +144,3 @@ pub(crate) static META_SCHEMAS_ALL: LazyLock<[(&'static str, &'static Value); 18 ), ] }); - -pub(crate) static META_SCHEMAS_DRAFT4: LazyLock<[(&'static str, &'static Value); 1]> = - LazyLock::new(|| [("http://json-schema.org/draft-04/schema#", &*DRAFT4)]); - -pub(crate) static META_SCHEMAS_DRAFT6: LazyLock<[(&'static str, &'static Value); 1]> = - LazyLock::new(|| [("http://json-schema.org/draft-06/schema#", &*DRAFT6)]); - -pub(crate) static META_SCHEMAS_DRAFT7: LazyLock<[(&'static str, &'static Value); 1]> = - LazyLock::new(|| [("http://json-schema.org/draft-07/schema#", &*DRAFT7)]); - -pub(crate) static META_SCHEMAS_DRAFT2019: LazyLock<[(&'static str, &'static Value); 7]> = - LazyLock::new(|| { - [ - ( - "https://json-schema.org/draft/2019-09/schema", - &*DRAFT201909, - ), - ( - "https://json-schema.org/draft/2019-09/meta/applicator", - &*DRAFT201909_APPLICATOR, - ), - ( - "https://json-schema.org/draft/2019-09/meta/content", - &*DRAFT201909_CONTENT, - ), - ( - "https://json-schema.org/draft/2019-09/meta/core", - &*DRAFT201909_CORE, - ), - ( - "https://json-schema.org/draft/2019-09/meta/format", - &*DRAFT201909_FORMAT, - ), - ( - "https://json-schema.org/draft/2019-09/meta/meta-data", - &*DRAFT201909_META_DATA, - ), - ( - "https://json-schema.org/draft/2019-09/meta/validation", - &*DRAFT201909_VALIDATION, - ), - ] - }); - -pub(crate) static META_SCHEMAS_DRAFT2020: LazyLock<[(&'static str, &'static Value); 8]> = - LazyLock::new(|| { - [ - ( - "https://json-schema.org/draft/2020-12/schema", - &*DRAFT202012, - ), - ( - "https://json-schema.org/draft/2020-12/meta/core", - &*DRAFT202012_CORE, - ), - ( - "https://json-schema.org/draft/2020-12/meta/applicator", - &*DRAFT202012_APPLICATOR, - ), - ( - "https://json-schema.org/draft/2020-12/meta/unevaluated", - &*DRAFT202012_UNEVALUATED, - ), - ( - "https://json-schema.org/draft/2020-12/meta/validation", - &*DRAFT202012_VALIDATION, - ), - ( - "https://json-schema.org/draft/2020-12/meta/meta-data", - &*DRAFT202012_META_DATA, - ), - ( - "https://json-schema.org/draft/2020-12/meta/format-annotation", - &*DRAFT202012_FORMAT_ANNOTATION, - ), - ( - "https://json-schema.org/draft/2020-12/meta/content", - &*DRAFT202012_CONTENT, - ), - ] - }); - -/// Return all the meta-schemas which are part of a given draft. -pub(crate) fn metas_for_draft(draft: Draft) -> &'static [(&'static str, &'static Value)] { - match draft { - Draft::Draft4 => &*META_SCHEMAS_DRAFT4, - Draft::Draft6 => &*META_SCHEMAS_DRAFT6, - Draft::Draft7 => &*META_SCHEMAS_DRAFT7, - Draft::Draft201909 => &*META_SCHEMAS_DRAFT2019, - // Unknown drafts default to 2020-12 vocabularies. - // Custom meta-schemas should explicitly declare vocabularies in their $vocabulary field. - Draft::Draft202012 | Draft::Unknown => &*META_SCHEMAS_DRAFT2020, - } -} diff --git a/crates/jsonschema-referencing/src/registry.rs b/crates/jsonschema-referencing/src/registry.rs index 48638cc6..5422dd87 100644 --- a/crates/jsonschema-referencing/src/registry.rs +++ b/crates/jsonschema-referencing/src/registry.rs @@ -1,48 +1,66 @@ -use std::{ - collections::{hash_map::Entry, VecDeque}, - num::NonZeroUsize, - pin::Pin, - sync::{Arc, LazyLock}, -}; +#[cfg(not(target_family = "wasm"))] +use std::sync::LazyLock; +use std::{borrow::Cow, collections::VecDeque, num::NonZeroUsize, sync::Arc}; use ahash::{AHashMap, AHashSet}; use fluent_uri::Uri; use serde_json::Value; use crate::{ - anchors::{AnchorKey, AnchorKeyRef}, + anchors::AnchorKey, cache::{SharedUriCache, UriCache}, - meta::{self, metas_for_draft}, - resource::{unescape_segment, InnerResourcePtr, JsonSchemaResource}, + meta, + resource::{unescape_segment, InnerResourcePtr, IntoDocument, JsonSchemaResource}, uri, vocabularies::{self, VocabularySet}, - Anchor, DefaultRetriever, Draft, Error, Resolver, Resource, ResourceRef, Retrieve, + Anchor, DefaultRetriever, Draft, Error, Resource, Retrieve, }; -/// An owned-or-refstatic wrapper for JSON `Value`. -#[derive(Debug)] -pub(crate) enum ValueWrapper { - Owned(Value), - StaticRef(&'static Value), +type DocumentStore<'doc> = AHashMap>, (std::borrow::Cow<'doc, Value>, Draft)>; +pub(crate) type DocumentEntry<'doc> = (Arc>, (Cow<'doc, Value>, Draft)); +pub(crate) type DocumentVec<'doc> = Vec>; +pub(crate) type ResourceMap = AHashMap>, InnerResourcePtr>; + +/// Pre-loaded registry containing all JSON Schema meta-schemas and their vocabularies. +pub static SPECIFICATIONS: Specifications = Specifications; + +pub struct Specifications; + +#[cfg(not(target_family = "wasm"))] +static SPECIFICATIONS_STORAGE: LazyLock> = + LazyLock::new(|| Registry::build_from_meta_schemas(meta::META_SCHEMAS_ALL.as_slice())); + +#[cfg(target_family = "wasm")] +thread_local! { + static SPECIFICATIONS_STORAGE: std::cell::OnceCell<&'static Registry<'static>> = std::cell::OnceCell::new(); } -impl AsRef for ValueWrapper { - fn as_ref(&self) -> &Value { - match self { - ValueWrapper::Owned(value) => value, - ValueWrapper::StaticRef(value) => value, +impl Specifications { + fn get() -> &'static Registry<'static> { + #[cfg(not(target_family = "wasm"))] + { + &SPECIFICATIONS_STORAGE + } + #[cfg(target_family = "wasm")] + { + SPECIFICATIONS_STORAGE.with(|cell| { + cell.get_or_init(move || { + Box::leak(Box::new(Registry::build_from_meta_schemas( + meta::META_SCHEMAS_ALL.as_slice(), + ))) + }) + }) } } } -// SAFETY: `Pin` guarantees stable memory locations for resource pointers, -// while `Arc` enables cheap sharing between multiple registries -type DocumentStore = AHashMap>, Pin>>; -type ResourceMap = AHashMap>, InnerResourcePtr>; +impl std::ops::Deref for Specifications { + type Target = Registry<'static>; -/// Pre-loaded registry containing all JSON Schema meta-schemas and their vocabularies -pub static SPECIFICATIONS: LazyLock = - LazyLock::new(|| Registry::build_from_meta_schemas(meta::META_SCHEMAS_ALL.as_slice())); + fn deref(&self) -> &Self::Target { + Self::get() + } +} /// A registry of JSON Schema resources, each identified by their canonical URIs. /// @@ -140,21 +158,53 @@ pub static SPECIFICATIONS: LazyLock = /// - Handle nested references /// - Process JSON Schema anchors /// -#[derive(Debug)] -pub struct Registry { - documents: DocumentStore, - pub(crate) resources: ResourceMap, - anchors: AHashMap, - resolution_cache: SharedUriCache, +/// Registry stores JSON Schema documents. +/// +/// Pure storage - contains only documents. Derived data (resources, anchors) +/// are computed by `ResolutionContext`. +pub struct Registry<'doc> { + pub(crate) documents: DocumentStore<'doc>, + pub(crate) resolution_cache: SharedUriCache, + pub(crate) retriever: Arc, + #[cfg(feature = "retrieve-async")] + pub(crate) async_retriever: Option>, + pub(crate) draft: Draft, } -impl Clone for Registry { +impl std::fmt::Debug for Registry<'_> { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("Registry") + .field("documents", &self.documents) + .field("resolution_cache", &self.resolution_cache) + .field("retriever", &"") + .field("async_retriever", &{ + #[cfg(feature = "retrieve-async")] + { + if self.async_retriever.is_some() { + Some("") + } else { + None + } + } + #[cfg(not(feature = "retrieve-async"))] + { + Option::<&str>::None + } + }) + .field("draft", &self.draft) + .finish() + } +} + +impl Clone for Registry<'_> { fn clone(&self) -> Self { Self { documents: self.documents.clone(), - resources: self.resources.clone(), - anchors: self.anchors.clone(), resolution_cache: self.resolution_cache.clone(), + retriever: Arc::clone(&self.retriever), + #[cfg(feature = "retrieve-async")] + async_retriever: self.async_retriever.as_ref().map(Arc::clone), + draft: self.draft, } } } @@ -211,8 +261,8 @@ impl RegistryOptions> { pub fn build( self, pairs: impl IntoIterator, Resource)>, - ) -> Result { - Registry::try_from_resources_impl(pairs, &*self.retriever, self.draft) + ) -> Result, Error> { + Registry::try_from_resources_with_retriever(pairs, self.retriever, self.draft) } } @@ -228,8 +278,9 @@ impl RegistryOptions> { pub async fn build( self, pairs: impl IntoIterator, Resource)>, - ) -> Result { - Registry::try_from_resources_async_impl(pairs, &*self.retriever, self.draft).await + ) -> Result, Error> { + Registry::try_from_resources_async_impl(pairs, Arc::clone(&self.retriever), self.draft) + .await } } @@ -274,7 +325,48 @@ impl Default for RegistryOptions> { } } -impl Registry { +impl Registry<'static> { + /// Create a new [`RegistryBuilder`](crate::RegistryBuilder) for constructing a registry. + /// + /// This is the recommended way to create new registries with the builder pattern. + /// + /// # Examples + /// + /// ```rust + /// use referencing::Registry; + /// use serde_json::json; + /// + /// # fn example() -> Result<(), Box> { + /// let schema = json!({"type": "string"}); + /// + /// let registry = Registry::builder() + /// .with_document("https://example.com/schema", &schema)? + /// .build()?; + /// # Ok(()) + /// # } + /// ``` + #[must_use] + pub fn builder<'doc>() -> crate::RegistryBuilder<'doc> { + crate::RegistryBuilder::new() + } +} + +impl<'doc> Registry<'doc> { + /// Get an iterator over the documents in this registry. + /// + /// Returns an iterator of `(&Uri, &(Cow, Draft))` pairs. + #[must_use = "iterating documents is side-effect free; consume the iterator to observe data"] + pub fn documents( + &self, + ) -> impl Iterator< + Item = ( + &Arc>, + &(std::borrow::Cow<'doc, Value>, Draft), + ), + > { + self.documents.iter() + } + /// Get [`RegistryOptions`] for configuring a new [`Registry`]. #[must_use] pub fn options() -> RegistryOptions> { @@ -290,8 +382,14 @@ impl Registry { /// # Errors /// /// Returns an error if the URI is invalid or if there's an issue processing the resource. - pub fn try_new(uri: impl AsRef, resource: Resource) -> Result { - Self::try_new_impl(uri, resource, &DefaultRetriever, Draft::default()) + pub fn try_new(uri: S, document: D) -> Result + where + S: AsRef, + D: IntoDocument<'doc>, + { + Registry::builder() + .with_document(uri.as_ref(), document)? + .build() } /// Create a new [`Registry`] from an iterator of (URI, Resource) pairs. /// @@ -302,47 +400,34 @@ impl Registry { /// # Errors /// /// Returns an error if any URI is invalid or if there's an issue processing the resources. - pub fn try_from_resources( - pairs: impl IntoIterator, Resource)>, - ) -> Result { - Self::try_from_resources_impl(pairs, &DefaultRetriever, Draft::default()) - } - fn try_new_impl( - uri: impl AsRef, - resource: Resource, - retriever: &dyn Retrieve, + pub fn try_from_resources(pairs: I) -> Result + where + I: IntoIterator, + S: AsRef, + D: IntoDocument<'doc>, + { + Self::try_from_resources_impl(pairs, Draft::default()) + } + fn try_from_resources_impl(pairs: I, draft: Draft) -> Result + where + I: IntoIterator, + S: AsRef, + D: IntoDocument<'doc>, + { + Self::try_from_documents_with_retriever(pairs, Arc::new(DefaultRetriever), draft) + } + + fn try_from_resources_with_retriever( + pairs: I, + retriever: Arc, draft: Draft, - ) -> Result { - Self::try_from_resources_impl([(uri, resource)], retriever, draft) - } - fn try_from_resources_impl( - pairs: impl IntoIterator, Resource)>, - retriever: &dyn Retrieve, - draft: Draft, - ) -> Result { - let mut documents = AHashMap::new(); - let mut resources = ResourceMap::new(); - let mut anchors = AHashMap::new(); - let mut resolution_cache = UriCache::new(); - let custom_metaschemas = process_resources( - pairs, - retriever, - &mut documents, - &mut resources, - &mut anchors, - &mut resolution_cache, - draft, - )?; - - // Validate that all custom $schema references are registered - validate_custom_metaschemas(&custom_metaschemas, &resources)?; - - Ok(Registry { - documents, - resources, - anchors, - resolution_cache: resolution_cache.into_shared(), - }) + ) -> Result + where + I: IntoIterator, + S: AsRef, + D: IntoDocument<'doc>, + { + Self::try_from_documents_with_retriever(pairs, retriever, draft) } /// Create a new [`Registry`] from an iterator of (URI, Resource) pairs using an async retriever. /// @@ -354,159 +439,170 @@ impl Registry { /// /// Returns an error if any URI is invalid or if there's an issue processing the resources. #[cfg(feature = "retrieve-async")] - async fn try_from_resources_async_impl( - pairs: impl IntoIterator, Resource)>, - retriever: &dyn crate::AsyncRetrieve, + async fn try_from_resources_async_impl( + pairs: I, + retriever: Arc, draft: Draft, - ) -> Result { - let mut documents = AHashMap::new(); - let mut resources = ResourceMap::new(); - let mut anchors = AHashMap::new(); - let mut resolution_cache = UriCache::new(); - - let custom_metaschemas = process_resources_async( - pairs, - retriever, - &mut documents, - &mut resources, - &mut anchors, - &mut resolution_cache, - draft, - ) - .await?; - - // Validate that all custom $schema references are registered - validate_custom_metaschemas(&custom_metaschemas, &resources)?; + ) -> Result + where + I: IntoIterator, + S: AsRef, + D: IntoDocument<'doc>, + { + Self::try_from_documents_async_impl(pairs, retriever, draft).await + } + + fn try_from_documents_with_retriever( + pairs: I, + retriever: Arc, + draft: Draft, + ) -> Result + where + I: IntoIterator, + S: AsRef, + D: IntoDocument<'doc>, + { + let converted = collect_documents(pairs)?; + crate::builder::build_registry_with_retriever(converted, retriever, draft) + } - Ok(Registry { - documents, - resources, - anchors, - resolution_cache: resolution_cache.into_shared(), - }) + #[cfg(feature = "retrieve-async")] + async fn try_from_documents_async_impl( + pairs: I, + retriever: Arc, + draft: Draft, + ) -> Result + where + I: IntoIterator, + S: AsRef, + D: IntoDocument<'doc>, + { + let converted = collect_documents(pairs)?; + crate::builder::build_registry_with_async_retriever(converted, retriever, draft).await + } + /// Create a resolution context for this registry. + /// + /// The context can be used to create resolvers via `context.try_resolver()` or to add + /// a root document for compilation via `context.with_root_document()`. + #[must_use] + pub fn context(&self) -> crate::ResolutionContext<'_> { + crate::ResolutionContext::new(self) } - /// Create a new registry with a new resource. + + // Note: We cannot provide a try_resolver convenience method because + // the Resolver needs to hold a reference to the ResolutionContext. + // Users must call registry.context().try_resolver(base_uri) instead. + + /// Create a new registry with an additional resource. + /// + /// This consumes the current registry and returns a new one with the resource added. /// /// # Errors /// - /// Returns an error if the URI is invalid or if there's an issue processing the resource. - pub fn try_with_resource( - self, - uri: impl AsRef, - resource: Resource, - ) -> Result { - let draft = resource.draft(); - self.try_with_resources([(uri, resource)], draft) + /// Returns an error if the URI is invalid or there's an issue processing the resource. + pub fn try_with_resource(self, uri: S, document: D) -> Result + where + S: AsRef, + D: IntoDocument<'doc>, + { + self.try_with_resources([(uri, document)]) } - /// Create a new registry with new resources. + + /// Create a new registry with multiple additional resources. + /// + /// This consumes the current registry and returns a new one with the resources added. /// /// # Errors /// - /// Returns an error if any URI is invalid or if there's an issue processing the resources. - pub fn try_with_resources( - self, - pairs: impl IntoIterator, Resource)>, - draft: Draft, - ) -> Result { - self.try_with_resources_and_retriever(pairs, &DefaultRetriever, draft) + /// Returns an error if any URI is invalid or there's an issue processing the resources. + pub fn try_with_resources(self, pairs: I) -> Result + where + I: IntoIterator, + S: AsRef, + D: IntoDocument<'doc>, + { + self.rebuild_with_resources(pairs) } - /// Create a new registry with new resources and using the given retriever. + + /// Create a new registry with an additional resource using async retrieval. /// /// # Errors /// - /// Returns an error if any URI is invalid or if there's an issue processing the resources. - pub fn try_with_resources_and_retriever( - self, - pairs: impl IntoIterator, Resource)>, - retriever: &dyn Retrieve, - draft: Draft, - ) -> Result { - let mut documents = self.documents; - let mut resources = self.resources; - let mut anchors = self.anchors; - let mut resolution_cache = self.resolution_cache.into_local(); - let custom_metaschemas = process_resources( - pairs, - retriever, - &mut documents, - &mut resources, - &mut anchors, - &mut resolution_cache, - draft, - )?; - validate_custom_metaschemas(&custom_metaschemas, &resources)?; - Ok(Registry { - documents, - resources, - anchors, - resolution_cache: resolution_cache.into_shared(), - }) + /// Returns an error if the URI is invalid or retrieving referenced resources fails. + #[cfg(feature = "retrieve-async")] + pub async fn try_with_resource_async(self, uri: S, document: D) -> Result + where + S: AsRef, + D: IntoDocument<'doc>, + { + self.try_with_resources_async([(uri, document)]).await } - /// Create a new registry with new resources and using the given non-blocking retriever. + + /// Create a new registry with multiple resources using async retrieval. /// /// # Errors /// - /// Returns an error if any URI is invalid or if there's an issue processing the resources. + /// Returns an error if any URI is invalid or fetching their references fails. #[cfg(feature = "retrieve-async")] - pub async fn try_with_resources_and_retriever_async( - self, - pairs: impl IntoIterator, Resource)>, - retriever: &dyn crate::AsyncRetrieve, - draft: Draft, - ) -> Result { - let mut documents = self.documents; - let mut resources = self.resources; - let mut anchors = self.anchors; - let mut resolution_cache = self.resolution_cache.into_local(); - let custom_metaschemas = process_resources_async( - pairs, + pub async fn try_with_resources_async(self, pairs: I) -> Result + where + I: IntoIterator, + S: AsRef, + D: IntoDocument<'doc>, + { + self.rebuild_with_resources_async(pairs).await + } + + fn rebuild_with_resources(self, pairs: I) -> Result + where + I: IntoIterator, + S: AsRef, + D: IntoDocument<'doc>, + { + let Registry { + documents, + resolution_cache: _, retriever, - &mut documents, - &mut resources, - &mut anchors, - &mut resolution_cache, + #[cfg(feature = "retrieve-async")] + async_retriever: _, draft, - ) - .await?; - validate_custom_metaschemas(&custom_metaschemas, &resources)?; - Ok(Registry { - documents, - resources, - anchors, - resolution_cache: resolution_cache.into_shared(), - }) - } - /// Create a new [`Resolver`] for this registry with the given base URI. - /// - /// # Errors - /// - /// Returns an error if the base URI is invalid. - pub fn try_resolver(&self, base_uri: &str) -> Result, Error> { - let base = uri::from_str(base_uri)?; - Ok(self.resolver(base)) - } - /// Create a new [`Resolver`] for this registry with a known valid base URI. - #[must_use] - pub fn resolver(&self, base_uri: Uri) -> Resolver<'_> { - Resolver::new(self, Arc::new(base_uri)) + } = self; + + let mut all_documents: Vec<_> = documents.into_iter().collect(); + let new_documents = collect_documents(pairs)?; + all_documents.extend(new_documents); + + crate::builder::build_registry_with_retriever(all_documents, retriever, draft) } - pub(crate) fn anchor<'a>(&self, uri: &'a Uri, name: &'a str) -> Result<&Anchor, Error> { - let key = AnchorKeyRef::new(uri, name); - if let Some(value) = self.anchors.get(key.borrow_dyn()) { - return Ok(value); - } - let resource = &self.resources[uri]; - if let Some(id) = resource.id() { - let uri = uri::from_str(id)?; - let key = AnchorKeyRef::new(&uri, name); - if let Some(value) = self.anchors.get(key.borrow_dyn()) { - return Ok(value); - } - } - if name.contains('/') { - Err(Error::invalid_anchor(name.to_string())) + + #[cfg(feature = "retrieve-async")] + async fn rebuild_with_resources_async(self, pairs: I) -> Result + where + I: IntoIterator, + S: AsRef, + D: IntoDocument<'doc>, + { + let Registry { + documents, + resolution_cache: _, + retriever, + async_retriever, + draft, + } = self; + + let mut all_documents: Vec<_> = documents.into_iter().collect(); + let new_documents = collect_documents(pairs)?; + all_documents.extend(new_documents); + + if let Some(async_retriever) = async_retriever { + crate::builder::build_registry_with_async_retriever( + all_documents, + async_retriever, + draft, + ) + .await } else { - Err(Error::no_such_anchor(name.to_string())) + crate::builder::build_registry_with_retriever(all_documents, retriever, draft) } } /// Resolves a reference URI against a base URI using registry's cache. @@ -534,12 +630,11 @@ impl Registry { { if let Ok(mut uri) = uri::from_str(specification) { // Remove fragment for lookup (e.g., "http://example.com/schema#" -> "http://example.com/schema") - // Resources are stored without fragments, so we must strip it to find the meta-schema + // Documents are stored without fragments, so we must strip it to find the meta-schema uri.set_fragment(None); - if let Some(resource) = self.resources.get(&uri) { + if let Some((doc_value, _)) = self.documents.get(&uri) { // Found the custom meta-schema - extract vocabularies - if let Ok(Some(vocabularies)) = vocabularies::find(resource.contents()) - { + if let Ok(Some(vocabularies)) = vocabularies::find(doc_value.as_ref()) { return vocabularies; } } @@ -557,80 +652,29 @@ impl Registry { /// Build a registry with all the given meta-schemas from specs. pub(crate) fn build_from_meta_schemas(schemas: &[(&'static str, &'static Value)]) -> Self { let schemas_count = schemas.len(); - let pairs = schemas - .iter() - .map(|(uri, schema)| (uri, ResourceRef::from_contents(schema))); - let mut documents = DocumentStore::with_capacity(schemas_count); - let mut resources = ResourceMap::with_capacity(schemas_count); - - // The actual number of anchors and cache-entries varies across - // drafts. We overshoot here to avoid reallocations, using the sum - // over all specifications. - let mut anchors = AHashMap::with_capacity(8); - let mut resolution_cache = UriCache::with_capacity(35); - - process_meta_schemas( - pairs, - &mut documents, - &mut resources, - &mut anchors, - &mut resolution_cache, - ) - .expect("Failed to process meta schemas"); + let resolution_cache = UriCache::with_capacity(35); + + for (uri_str, schema) in schemas { + let uri = + uri::from_str(uri_str.trim_end_matches('#')).expect("Invalid URI in meta-schema"); + let draft = Draft::default().detect(schema); + documents.insert(Arc::new(uri), (std::borrow::Cow::Borrowed(*schema), draft)); + } Self { documents, - resources, - anchors, resolution_cache: resolution_cache.into_shared(), + retriever: Arc::new(DefaultRetriever), + #[cfg(feature = "retrieve-async")] + async_retriever: None, + draft: Draft::default(), } } } -fn process_meta_schemas( - pairs: impl IntoIterator, ResourceRef<'static>)>, - documents: &mut DocumentStore, - resources: &mut ResourceMap, - anchors: &mut AHashMap, - resolution_cache: &mut UriCache, -) -> Result<(), Error> { - let mut queue = VecDeque::with_capacity(32); - - for (uri, resource) in pairs { - let uri = uri::from_str(uri.as_ref().trim_end_matches('#'))?; - let key = Arc::new(uri); - let contents: &'static Value = resource.contents(); - let wrapped_value = Arc::pin(ValueWrapper::StaticRef(contents)); - let resource = InnerResourcePtr::new((*wrapped_value).as_ref(), resource.draft()); - documents.insert(Arc::clone(&key), wrapped_value); - resources.insert(Arc::clone(&key), resource.clone()); - queue.push_back((key, resource)); - } - - // Process current queue and collect references to external resources - while let Some((mut base, resource)) = queue.pop_front() { - if let Some(id) = resource.id() { - base = resolution_cache.resolve_against(&base.borrow(), id)?; - resources.insert(base.clone(), resource.clone()); - } - - // Look for anchors - for anchor in resource.anchors() { - anchors.insert(AnchorKey::new(base.clone(), anchor.name()), anchor); - } - - // Process subresources - for contents in resource.draft().subresources_of(resource.contents()) { - let subresource = InnerResourcePtr::new(contents, resource.draft()); - queue.push_back((base.clone(), subresource)); - } - } - Ok(()) -} - #[derive(Hash, Eq, PartialEq)] -struct ReferenceKey { +pub(crate) struct ReferenceKey { base_ptr: NonZeroUsize, reference: String, } @@ -648,22 +692,22 @@ impl ReferenceKey { type ReferenceTracker = AHashSet; #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] -enum ReferenceKind { +pub(crate) enum ReferenceKind { Ref, Schema, } -struct ProcessingState { - queue: VecDeque<(Arc>, InnerResourcePtr)>, - seen: ReferenceTracker, - external: AHashSet<(String, Uri, ReferenceKind)>, - scratch: String, - refers_metaschemas: bool, - custom_metaschemas: Vec>>, +pub(crate) struct ProcessingState { + pub(crate) queue: VecDeque<(Arc>, InnerResourcePtr)>, + pub(crate) seen: ReferenceTracker, + pub(crate) external: AHashSet<(String, Uri, ReferenceKind, Draft)>, + pub(crate) scratch: String, + pub(crate) refers_metaschemas: bool, + pub(crate) custom_metaschemas: Vec>>, } impl ProcessingState { - fn new() -> Self { + pub(crate) fn new() -> Self { Self { queue: VecDeque::with_capacity(32), seen: ReferenceTracker::new(), @@ -675,37 +719,7 @@ impl ProcessingState { } } -fn process_input_resources( - pairs: impl IntoIterator, Resource)>, - documents: &mut DocumentStore, - resources: &mut ResourceMap, - state: &mut ProcessingState, -) -> Result<(), Error> { - for (uri, resource) in pairs { - let uri = uri::from_str(uri.as_ref().trim_end_matches('#'))?; - let key = Arc::new(uri); - match documents.entry(Arc::clone(&key)) { - Entry::Occupied(_) => {} - Entry::Vacant(entry) => { - let (draft, contents) = resource.into_inner(); - let wrapped_value = Arc::pin(ValueWrapper::Owned(contents)); - let resource = InnerResourcePtr::new((*wrapped_value).as_ref(), draft); - resources.insert(Arc::clone(&key), resource.clone()); - - // Track resources with custom meta-schemas for later validation - if draft == Draft::Unknown { - state.custom_metaschemas.push(Arc::clone(&key)); - } - - state.queue.push_back((key, resource)); - entry.insert(wrapped_value); - } - } - } - Ok(()) -} - -fn process_queue( +pub(crate) fn process_queue( state: &mut ProcessingState, resources: &mut ResourceMap, anchors: &mut AHashMap, @@ -724,6 +738,7 @@ fn process_queue( collect_external_resources( &base, resource.contents(), + resource.draft(), &mut state.external, &mut state.seen, resolution_cache, @@ -739,7 +754,7 @@ fn process_queue( Ok(()) } -fn handle_fragment( +pub(crate) fn handle_fragment( uri: &Uri, resource: &InnerResourcePtr, key: &Arc>, @@ -756,39 +771,32 @@ fn handle_fragment( } } -fn handle_metaschemas( - refers_metaschemas: bool, - resources: &mut ResourceMap, - anchors: &mut AHashMap, - draft_version: Draft, -) { - if refers_metaschemas { - let schemas = metas_for_draft(draft_version); - let draft_registry = Registry::build_from_meta_schemas(schemas); - resources.reserve(draft_registry.resources.len()); - for (key, resource) in draft_registry.resources { - resources.insert(key, resource.clone()); - } - anchors.reserve(draft_registry.anchors.len()); - for (key, anchor) in draft_registry.anchors { - anchors.insert(key, anchor); - } - } -} +// Removed: handle_metaschemas is no longer needed since resources/anchors +// are computed by ResolutionContext, not stored in Registry -fn create_resource( +pub(crate) fn create_resource( retrieved: Value, fragmentless: Uri, default_draft: Draft, - documents: &mut DocumentStore, + documents: &mut DocumentStore<'_>, resources: &mut ResourceMap, custom_metaschemas: &mut Vec>>, ) -> (Arc>, InnerResourcePtr) { let draft = default_draft.detect(&retrieved); - let wrapped_value = Arc::pin(ValueWrapper::Owned(retrieved)); - let resource = InnerResourcePtr::new((*wrapped_value).as_ref(), draft); let key = Arc::new(fragmentless); - documents.insert(Arc::clone(&key), wrapped_value); + + // Store as Cow::Owned first + documents.insert( + Arc::clone(&key), + (std::borrow::Cow::Owned(retrieved), draft), + ); + + // Get pointer to the stored value + let stored_value = match &documents[&key].0 { + std::borrow::Cow::Owned(v) => v as *const Value, + std::borrow::Cow::Borrowed(v) => *v as *const Value, + }; + let resource = InnerResourcePtr::new(stored_value, draft); resources.insert(Arc::clone(&key), resource.clone()); // Track resources with custom meta-schemas for later validation @@ -799,128 +807,24 @@ fn create_resource( (key, resource) } -fn process_resources( - pairs: impl IntoIterator, Resource)>, - retriever: &dyn Retrieve, - documents: &mut DocumentStore, - resources: &mut ResourceMap, - anchors: &mut AHashMap, - resolution_cache: &mut UriCache, - default_draft: Draft, -) -> Result>>, Error> { - let mut state = ProcessingState::new(); - process_input_resources(pairs, documents, resources, &mut state)?; - - loop { - if state.queue.is_empty() && state.external.is_empty() { - break; - } - - process_queue(&mut state, resources, anchors, resolution_cache)?; - - // Retrieve external resources - for (original, uri, kind) in state.external.drain() { - let mut fragmentless = uri.clone(); - fragmentless.set_fragment(None); - if !resources.contains_key(&fragmentless) { - let retrieved = match retriever.retrieve(&fragmentless) { - Ok(retrieved) => retrieved, - Err(error) => { - handle_retrieve_error(&uri, &original, &fragmentless, error, kind)?; - continue; - } - }; - - let (key, resource) = create_resource( - retrieved, - fragmentless, - default_draft, - documents, - resources, - &mut state.custom_metaschemas, - ); - handle_fragment(&uri, &resource, &key, default_draft, &mut state.queue); - state.queue.push_back((key, resource)); - } - } - } - - handle_metaschemas(state.refers_metaschemas, resources, anchors, default_draft); - - Ok(state.custom_metaschemas) -} - -#[cfg(feature = "retrieve-async")] -async fn process_resources_async( - pairs: impl IntoIterator, Resource)>, - retriever: &dyn crate::AsyncRetrieve, - documents: &mut DocumentStore, - resources: &mut ResourceMap, - anchors: &mut AHashMap, - resolution_cache: &mut UriCache, - default_draft: Draft, -) -> Result>>, Error> { - let mut state = ProcessingState::new(); - process_input_resources(pairs, documents, resources, &mut state)?; - - loop { - if state.queue.is_empty() && state.external.is_empty() { - break; - } - - process_queue(&mut state, resources, anchors, resolution_cache)?; - - if !state.external.is_empty() { - let data = state - .external - .drain() - .filter_map(|(original, uri, kind)| { - let mut fragmentless = uri.clone(); - fragmentless.set_fragment(None); - if resources.contains_key(&fragmentless) { - None - } else { - Some((original, uri, kind, fragmentless)) - } - }) - .collect::>(); - - let results = { - let futures = data - .iter() - .map(|(_, _, _, fragmentless)| retriever.retrieve(fragmentless)); - futures::future::join_all(futures).await - }; - - for ((original, uri, kind, fragmentless), result) in data.iter().zip(results) { - let retrieved = match result { - Ok(retrieved) => retrieved, - Err(error) => { - handle_retrieve_error(uri, original, fragmentless, error, *kind)?; - continue; - } - }; - - let (key, resource) = create_resource( - retrieved, - fragmentless.clone(), - default_draft, - documents, - resources, - &mut state.custom_metaschemas, - ); - handle_fragment(uri, &resource, &key, default_draft, &mut state.queue); - state.queue.push_back((key, resource)); - } - } - } - - handle_metaschemas(state.refers_metaschemas, resources, anchors, default_draft); - - Ok(state.custom_metaschemas) +fn collect_documents<'doc, I, S, D>(pairs: I) -> Result, Error> +where + I: IntoIterator, + S: AsRef, + D: IntoDocument<'doc>, +{ + pairs + .into_iter() + .map(|(uri_str, document)| { + let uri_str = uri_str.as_ref().trim_end_matches('#'); + let uri = uri::from_str(uri_str)?; + let (cow, draft) = document.into_document(); + Ok((Arc::new(uri), (cow, draft))) + }) + .collect() } -fn handle_retrieve_error( +pub(crate) fn handle_retrieve_error( uri: &Uri, original: &str, fragmentless: &Uri, @@ -947,7 +851,7 @@ fn handle_retrieve_error( } } -fn validate_custom_metaschemas( +pub(crate) fn validate_custom_metaschemas( custom_metaschemas: &[Arc>], resources: &ResourceMap, ) -> Result<(), Error> { @@ -980,10 +884,11 @@ fn validate_custom_metaschemas( Ok(()) } -fn collect_external_resources( +pub(crate) fn collect_external_resources( base: &Arc>, contents: &Value, - collected: &mut AHashSet<(String, Uri, ReferenceKind)>, + draft: Draft, + collected: &mut AHashSet<(String, Uri, ReferenceKind, Draft)>, seen: &mut ReferenceTracker, resolution_cache: &mut UriCache, scratch: &mut String, @@ -1014,6 +919,7 @@ fn collect_external_resources( collect_external_resources( base, referenced, + draft, collected, seen, resolution_cache, @@ -1060,7 +966,7 @@ fn collect_external_resources( } else { ReferenceKind::Ref }; - collected.insert(($reference.to_string(), resolved, kind)); + collected.insert(($reference.to_string(), resolved, kind, draft)); } } } @@ -1172,7 +1078,8 @@ mod tests { Registry::try_new("http://example.com/schema1", schema).expect("Invalid resources"); // Attempt to create a resolver for a URL not in the registry - let resolver = registry + let context = registry.context(); + let resolver = context .try_resolver("http://example.com/non_existent_schema") .expect("Invalid base URI"); @@ -1206,10 +1113,7 @@ mod tests { })); let error = base_registry - .try_with_resources( - [("http://example.com/custom", custom_schema)], - Draft::default(), - ) + .try_with_resources([("http://example.com/custom", custom_schema)]) .expect_err("Extending registry must fail when the custom $schema is not registered"); let error_msg = error.to_string(); @@ -1238,10 +1142,7 @@ mod tests { registry .clone() - .try_with_resources( - [("http://example.com/schemas/my-schema", schema)], - Draft::default(), - ) + .try_with_resources([("http://example.com/schemas/my-schema", schema)]) .expect("Schema should accept registered meta-schema URI with trailing '#'"); } @@ -1516,8 +1417,9 @@ mod tests { .build(input_pairs) .expect("Invalid resources"); // Verify that all expected URIs are resolved and present in resources + let context = registry.context(); for uri in test_case.expected_resolved_uris { - let resolver = registry.try_resolver("").expect("Invalid base URI"); + let resolver = context.try_resolver("").expect("Invalid base URI"); assert!(resolver.lookup(uri).is_ok()); } } @@ -1575,10 +1477,10 @@ mod tests { ); let registry = result.unwrap(); - let resource = registry - .resources - .get(&from_str("http://example.com/schema").expect("Invalid URI")) - .unwrap(); + // Get the resource via context + let context = registry.context(); + let uri = from_str("http://example.com/schema").expect("Invalid URI"); + let resource = context.get_resource(&uri).unwrap(); let properties = resource .contents() .get("properties") @@ -1601,7 +1503,8 @@ mod tests { .clone() .try_with_resource("http://example.com", Resource::from_contents(json!({}))) .expect("Invalid resource"); - let resolver = registry + let context = registry.context(); + let resolver = context .try_resolver("http://127.0.0.1/schema") .expect("Invalid base URI"); assert_eq!( @@ -1616,7 +1519,8 @@ mod tests { .clone() .try_with_resource("http://example.com", Resource::from_contents(json!({}))) .expect("Invalid resource"); - let resolver = registry.try_resolver("").expect("Invalid base URI"); + let context = registry.context(); + let resolver = context.try_resolver("").expect("Invalid base URI"); let resolved = resolver .lookup("http://json-schema.org/draft-06/schema#/definitions/schemaArray") .expect("Lookup failed"); @@ -1630,6 +1534,117 @@ mod tests { ); } + #[test] + fn root_document_registers_nested_ids() { + let registry = Registry::builder() + .build() + .expect("Failed to build empty registry"); + let schema = json!({ + "$id": "https://example.com/root", + "$defs": { + "Foo": { + "$id": "Foo", + "type": "string" + } + } + }); + let base_uri = from_str("https://example.com/root").expect("Invalid URI"); + let context = registry + .context() + .with_root_document(base_uri.clone(), &schema, Draft::Draft202012) + .expect("Root document should be accepted"); + let resolver = context.resolver(base_uri.clone()); + let resolved = resolver + .lookup("https://example.com/Foo") + .expect("Nested $id should resolve"); + assert_eq!( + resolved.contents(), + schema + .pointer("/$defs/Foo") + .expect("Missing $defs.Foo definition") + ); + } + + #[derive(Default)] + struct MapRetriever { + schemas: AHashMap, + } + + impl Retrieve for MapRetriever { + fn retrieve( + &self, + uri: &Uri, + ) -> Result> { + if let Some(value) = self.schemas.get(uri.as_str()) { + Ok(value.clone()) + } else { + Err(Box::new(std::io::Error::new( + std::io::ErrorKind::NotFound, + format!("Missing {uri}"), + ))) + } + } + } + + #[test] + fn try_with_resource_fetches_external_dependencies() { + let mut retriever = MapRetriever::default(); + retriever.schemas.insert( + "http://example.com/external".to_string(), + json!({"type": "string"}), + ); + + let registry = Registry::builder() + .with_document("http://example.com/base", json!({})) + .expect("Failed to add base document") + .with_retriever(retriever) + .build() + .expect("Failed to build registry"); + + let registry = registry + .try_with_resource( + "http://example.com/new", + Resource::from_contents(json!({"$ref": "http://example.com/external"})), + ) + .expect("Failed to extend registry"); + + let context = registry.context(); + let resolver = context + .try_resolver("http://example.com/new") + .expect("Invalid base URI"); + let resolved = resolver + .lookup("http://example.com/external") + .expect("External reference should resolve"); + assert_eq!(resolved.contents(), &json!({"type": "string"})); + } + + #[test] + fn builder_preserves_borrowed_documents() { + let schema = json!({ + "$id": "https://example.com/root", + "$defs": { + "Foo": { "type": "string" } + }, + "$ref": "#/$defs/Foo" + }); + + let registry = Registry::builder() + .with_document("https://example.com/root", (&schema, Draft::Draft202012)) + .expect("Failed to add document") + .build() + .expect("Failed to build registry"); + + let context = registry.context(); + let uri = from_str("https://example.com/root").expect("Invalid URI"); + let resource = context + .get_resource(&uri) + .expect("Resource should exist for root document"); + assert!( + resource.contents().pointer("/$defs/Foo").is_some(), + "Borrowed schema lost definitions" + ); + } + #[test] fn test_invalid_reference() { // Found via fuzzing @@ -1733,12 +1748,13 @@ mod async_tests { ); let registry = result.unwrap(); - let resource = registry - .resources - .get(&uri::from_str("http://example.com/schema").expect("Invalid URI")) - .unwrap(); - let properties = resource - .contents() + let uri = uri::from_str("http://example.com/schema").expect("Invalid URI"); + let (document, _) = registry + .documents + .get(&uri) + .expect("Document should be registered"); + let properties = document + .as_ref() .get("properties") .and_then(|v| v.as_object()) .unwrap(); @@ -1769,7 +1785,18 @@ mod async_tests { .await .expect("Invalid resource"); - let resolver = registry.try_resolver("").expect("Invalid base URI"); + let registry = registry + .try_with_resource_async( + "http://example.com/new", + Resource::from_contents(json!({"$ref": "http://example.com/schema2"})), + ) + .await + .expect("Failed to extend registry"); + + let context = registry.context(); + let resolver = context + .try_resolver("http://example.com/new") + .expect("Invalid base URI"); let resolved = resolver .lookup("http://example.com/schema2") .expect("Lookup failed"); @@ -1806,7 +1833,8 @@ mod async_tests { .await .expect("Invalid resource"); - let resolver = registry.try_resolver("").expect("Invalid base URI"); + let context = registry.context(); + let resolver = context.try_resolver("").expect("Invalid base URI"); // Check both references are resolved correctly let resolved2 = resolver @@ -1859,7 +1887,8 @@ mod async_tests { .await .expect("Invalid resource"); - let resolver = registry.try_resolver("").expect("Invalid base URI"); + let context = registry.context(); + let resolver = context.try_resolver("").expect("Invalid base URI"); // Verify nested reference resolution let resolved = resolver diff --git a/crates/jsonschema-referencing/src/resolver.rs b/crates/jsonschema-referencing/src/resolver.rs index e3d39ee1..1d798355 100644 --- a/crates/jsonschema-referencing/src/resolver.rs +++ b/crates/jsonschema-referencing/src/resolver.rs @@ -4,14 +4,16 @@ use std::sync::Arc; use fluent_uri::Uri; use serde_json::Value; -use crate::{list::List, resource::JsonSchemaResource, Draft, Error, Registry, ResourceRef}; +use crate::{ + list::List, resource::JsonSchemaResource, Draft, Error, ResolutionContext, ResourceRef, +}; /// A reference resolver. /// -/// Resolves references against the base URI and looks up the result in the registry. +/// Resolves references against the base URI and looks up the result in the resolution context. #[derive(Clone)] pub struct Resolver<'r> { - pub(crate) registry: &'r Registry, + pub(crate) context: &'r ResolutionContext<'r>, base_uri: Arc>, scopes: List>, } @@ -45,10 +47,10 @@ impl fmt::Debug for Resolver<'_> { } impl<'r> Resolver<'r> { - /// Create a new `Resolver` with the given registry and base URI. - pub(crate) fn new(registry: &'r Registry, base_uri: Arc>) -> Self { + /// Create a new `Resolver` with the given resolution context and base URI. + pub(crate) fn new(context: &'r ResolutionContext<'r>, base_uri: Arc>) -> Self { Self { - registry, + context, base_uri, scopes: List::new(), } @@ -72,12 +74,13 @@ impl<'r> Resolver<'r> { (reference, "") }; let uri = self - .registry + .context + .resolution_cache() .resolve_against(&self.base_uri.borrow(), uri)?; (uri, fragment) }; - let Some(retrieved) = self.registry.resources.get(&*uri) else { + let Some(retrieved) = self.context.get_resource(&uri) else { return Err(Error::unretrievable( uri.as_str(), "Retrieving external resources is not supported once the registry is populated" @@ -91,7 +94,7 @@ impl<'r> Resolver<'r> { } if !fragment.is_empty() { - let retrieved = self.registry.anchor(&uri, fragment)?; + let retrieved = self.context.anchor(&uri, fragment)?; let resolver = self.evolve(uri); return retrieved.resolve(resolver); } @@ -160,9 +163,9 @@ impl<'r> Resolver<'r> { subresource: &impl JsonSchemaResource, ) -> Result { if let Some(id) = subresource.id() { - let base_uri = self.registry.resolve_against(&self.base_uri.borrow(), id)?; + let base_uri = self.context.resolve_against(&self.base_uri.borrow(), id)?; Ok(Resolver { - registry: self.registry, + context: self.context, base_uri, scopes: self.scopes.clone(), }) @@ -179,13 +182,13 @@ impl<'r> Resolver<'r> { && (self.scopes.is_empty() || base_uri != self.base_uri) { Resolver { - registry: self.registry, + context: self.context, base_uri, scopes: self.scopes.push_front(self.base_uri.clone()), } } else { Resolver { - registry: self.registry, + context: self.context, base_uri, scopes: self.scopes.clone(), } @@ -197,7 +200,7 @@ impl<'r> Resolver<'r> { /// /// If the reference is invalid. pub fn resolve_against(&self, base: &Uri<&str>, uri: &str) -> Result>, Error> { - self.registry.resolve_against(base, uri) + self.context.resolve_against(base, uri) } } diff --git a/crates/jsonschema-referencing/src/resource.rs b/crates/jsonschema-referencing/src/resource.rs index becb8e7b..126ba3a0 100644 --- a/crates/jsonschema-referencing/src/resource.rs +++ b/crates/jsonschema-referencing/src/resource.rs @@ -17,6 +17,60 @@ pub(crate) trait JsonSchemaResource { } } +/// Trait for types that can be converted into a document for registry storage. +/// +/// This trait provides a unified interface for adding documents to a registry, +/// supporting both borrowed and owned schemas with optional draft specification. +pub trait IntoDocument<'doc> { + /// Convert this type into a `Cow<'doc, Value>` and detect or use the provided draft. + fn into_document(self) -> (Cow<'doc, Value>, Draft); +} + +// Borrowed schema - auto-detect draft +impl<'doc> IntoDocument<'doc> for &'doc Value { + fn into_document(self) -> (Cow<'doc, Value>, Draft) { + let draft = Draft::default().detect(self); + (Cow::Borrowed(self), draft) + } +} + +// Owned schema - auto-detect draft +impl<'doc> IntoDocument<'doc> for Value { + fn into_document(self) -> (Cow<'doc, Value>, Draft) { + let draft = Draft::default().detect(&self); + (Cow::Owned(self), draft) + } +} + +// Borrowed schema with explicit draft +impl<'doc> IntoDocument<'doc> for (&'doc Value, Draft) { + fn into_document(self) -> (Cow<'doc, Value>, Draft) { + (Cow::Borrowed(self.0), self.1) + } +} + +// Owned schema with explicit draft +impl<'doc> IntoDocument<'doc> for (Value, Draft) { + fn into_document(self) -> (Cow<'doc, Value>, Draft) { + (Cow::Owned(self.0), self.1) + } +} + +// Existing Resource type +impl<'doc> IntoDocument<'doc> for Resource { + fn into_document(self) -> (Cow<'doc, Value>, Draft) { + let (draft, contents) = self.into_inner(); + (Cow::Owned(contents), draft) + } +} + +// Implementation for re-adding documents from an existing registry +impl<'doc> IntoDocument<'doc> for (&'doc Cow<'doc, Value>, Draft) { + fn into_document(self) -> (Cow<'doc, Value>, Draft) { + (Cow::Borrowed(self.0.as_ref()), self.1) + } +} + /// An owned document with a concrete interpretation under a JSON Schema specification. #[derive(Debug, Clone, PartialEq, Eq)] pub struct Resource { @@ -292,7 +346,7 @@ mod tests { assert_eq!(unescaped, double_replaced, "Failed for: {input}"); } - fn create_test_registry() -> Registry { + fn create_test_registry() -> Registry<'static> { let schema = Draft::Draft202012.create_resource(json!({ "type": "object", "properties": { @@ -313,7 +367,8 @@ mod tests { })); let registry = Registry::try_new("http://example.com", schema.clone()).expect("Invalid resources"); - let resolver = registry + let context = registry.context(); + let resolver = context .try_resolver("http://example.com") .expect("Invalid base URI"); @@ -340,7 +395,8 @@ mod tests { #[test] fn test_percent_encoded_non_utf8() { let registry = create_test_registry(); - let resolver = registry + let context = registry.context(); + let resolver = context .try_resolver("http://example.com") .expect("Invalid base URI"); @@ -356,7 +412,8 @@ mod tests { #[test] fn test_array_index_as_string() { let registry = create_test_registry(); - let resolver = registry + let context = registry.context(); + let resolver = context .try_resolver("http://example.com") .expect("Invalid base URI"); @@ -372,7 +429,8 @@ mod tests { #[test] fn test_array_index_out_of_bounds() { let registry = create_test_registry(); - let resolver = registry + let context = registry.context(); + let resolver = context .try_resolver("http://example.com") .expect("Invalid base URI"); @@ -386,7 +444,8 @@ mod tests { #[test] fn test_unknown_property() { let registry = create_test_registry(); - let resolver = registry + let context = registry.context(); + let resolver = context .try_resolver("http://example.com") .expect("Invalid base URI"); diff --git a/crates/jsonschema-referencing/tests/suite.rs b/crates/jsonschema-referencing/tests/suite.rs index 967392a1..8234c9be 100644 --- a/crates/jsonschema-referencing/tests/suite.rs +++ b/crates/jsonschema-referencing/tests/suite.rs @@ -49,7 +49,8 @@ fn test_suite(draft: &'static str, test: Test) { .map(|(uri, content)| (uri, draft.create_resource(content))), ) .expect("Invalid registry"); - let resolver = registry + let context = registry.context(); + let resolver = context .try_resolver(test.base_uri.unwrap_or_default()) .expect("Invalid base URI"); if test.error.is_some() { diff --git a/crates/jsonschema/src/compiler.rs b/crates/jsonschema/src/compiler.rs index 35186bb1..52029b46 100644 --- a/crates/jsonschema/src/compiler.rs +++ b/crates/jsonschema/src/compiler.rs @@ -19,11 +19,10 @@ use crate::{ }; use ahash::{AHashMap, AHashSet}; use referencing::{ - uri, Draft, List, Registry, Resolved, Resolver, Resource, ResourceRef, Uri, Vocabulary, - VocabularySet, + uri, Draft, List, Registry, Resolved, Resolver, ResourceRef, Uri, Vocabulary, VocabularySet, }; use serde_json::{Map, Value}; -use std::{borrow::Cow, cell::RefCell, iter::once, rc::Rc, sync::Arc}; +use std::{cell::RefCell, rc::Rc, sync::Arc}; const DEFAULT_SCHEME: &str = "json-schema"; pub(crate) const DEFAULT_BASE_URI: &str = "json-schema:///"; @@ -117,24 +116,26 @@ impl SharedContextState { /// Per-location view used while compiling schemas into validators. #[derive(Debug, Clone)] -pub(crate) struct Context<'a> { - config: &'a ValidationOptions, - pub(crate) registry: &'a Registry, - resolver: Resolver<'a>, +pub(crate) struct Context<'a, 'doc> { + config: &'a ValidationOptions<'doc>, + pub(crate) registry: &'doc Registry<'doc>, + resolver: Resolver<'doc>, vocabularies: VocabularySet, location: Location, pub(crate) draft: Draft, shared: SharedContextState, + pattern_options: PatternEngineOptions, } -impl<'a> Context<'a> { +impl<'a, 'doc> Context<'a, 'doc> { pub(crate) fn new( - config: &'a ValidationOptions, - registry: &'a Registry, - resolver: Resolver<'a>, + config: &'a ValidationOptions<'doc>, + registry: &'doc Registry<'doc>, + resolver: Resolver<'doc>, vocabularies: VocabularySet, draft: Draft, location: Location, + pattern_options: PatternEngineOptions, ) -> Self { Context { config, @@ -144,20 +145,25 @@ impl<'a> Context<'a> { vocabularies, draft, shared: SharedContextState::new(), + pattern_options, } } pub(crate) fn draft(&self) -> Draft { self.draft } - pub(crate) fn config(&self) -> &ValidationOptions { + #[allow(dead_code)] + pub(crate) fn config(&self) -> &ValidationOptions<'doc> { self.config } + pub(crate) const fn pattern_options(&self) -> PatternEngineOptions { + self.pattern_options + } /// Create a context for this schema. pub(crate) fn in_subresource( &'a self, resource: ResourceRef<'_>, - ) -> Result, referencing::Error> { + ) -> Result, referencing::Error> { let resolver = self.resolver.in_subresource(resource)?; Ok(Context { config: self.config, @@ -167,6 +173,7 @@ impl<'a> Context<'a> { draft: resource.draft(), location: self.location.clone(), shared: self.shared.clone(), + pattern_options: self.pattern_options, }) } pub(crate) fn as_resource_ref<'r>(&'a self, contents: &'r Value) -> ResourceRef<'r> { @@ -184,6 +191,7 @@ impl<'a> Context<'a> { location, draft: self.draft, shared: self.shared.clone(), + pattern_options: self.pattern_options, } } pub(crate) fn lookup(&'a self, reference: &str) -> Result, referencing::Error> { @@ -264,7 +272,7 @@ impl<'a> Context<'a> { draft: Draft, vocabularies: VocabularySet, location: Location, - ) -> Context<'a> { + ) -> Context<'a, 'a> { Context { config: self.config, registry: self.registry, @@ -273,6 +281,7 @@ impl<'a> Context<'a> { vocabularies, location, shared: self.shared.clone(), + pattern_options: self.pattern_options, } } pub(crate) fn get_content_media_type_check( @@ -506,7 +515,7 @@ impl<'a> Context<'a> { } } - let (backtrack_limit, size_limit, dfa_size_limit) = match self.config.pattern_options() { + let (backtrack_limit, size_limit, dfa_size_limit) = match self.pattern_options { PatternEngineOptions::FancyRegex { backtrack_limit, size_limit, @@ -549,7 +558,7 @@ impl<'a> Context<'a> { } } - let (size_limit, dfa_size_limit) = match self.config.pattern_options() { + let (size_limit, dfa_size_limit) = match self.pattern_options { PatternEngineOptions::Regex { size_limit, dfa_size_limit, @@ -608,46 +617,80 @@ impl<'a> Context<'a> { } pub(crate) fn build_validator( - config: &ValidationOptions, + config: &ValidationOptions<'_>, schema: &Value, ) -> Result> { - let draft = config.draft_for(schema)?; + // Detect draft inline to avoid lifetime issues + // Preference: Explicitly set -> Autodetected (with registry resolution) -> Default + let draft = if let Some(draft) = config.draft { + draft + } else { + let default = Draft::default(); + let detected = default.detect(schema); + + // If detected draft is Unknown (custom meta-schema), try to resolve it + if detected == Draft::Unknown { + if let Some(registry) = config.registry { + if let Some(meta_schema_uri) = schema + .as_object() + .and_then(|obj| obj.get("$schema")) + .and_then(|s| s.as_str()) + { + // Walk the meta-schema chain to find the underlying draft + ValidationOptions::>::resolve_draft_from_registry(meta_schema_uri, registry)? + } else { + detected + } + } else { + detected + } + } else { + detected + } + }; + let resource_ref = draft.create_resource_ref(schema); - let resource = draft.create_resource(schema.clone()); let base_uri = if let Some(base_uri) = config.base_uri.as_ref() { uri::from_str(base_uri)? } else { uri::from_str(resource_ref.id().unwrap_or(DEFAULT_BASE_URI))? }; - // Build a registry & resolver needed for validator compilation - // Clone resources to drain them without mutating the original config - let pairs = collect_resource_pairs(base_uri.as_str(), resource, config.resources.clone()); - - let registry = if let Some(ref registry) = config.registry { - Arc::new(registry.clone().try_with_resources_and_retriever( - pairs, - &*config.retriever, - draft, - )?) + // Build registry with retriever if not provided + let owned_registry; + let (registry, needs_root_document): (&Registry, bool) = if let Some(registry) = config.registry + { + (registry, true) } else { - Arc::new( - Registry::options() - .draft(draft) - .retriever(Arc::clone(&config.retriever)) - .build(pairs)?, - ) + // Build a registry with the root schema and retriever + // (SPECIFICATIONS meta-schemas are included automatically) + owned_registry = Registry::builder() + .with_document(base_uri.as_str(), (schema, draft))? + .with_retriever(config.retriever.clone()) + .build()?; + (&owned_registry, false) }; let vocabularies = registry.find_vocabularies(draft, schema); - let resolver = registry.resolver(base_uri); + // Create resolution context with root document + let context = if needs_root_document { + registry + .context() + .with_root_document(base_uri.clone(), schema, draft)? + } else { + registry.context() + }; + let resolver = context.resolver(base_uri); + + let pattern_options = config.pattern_options(); let ctx = Context::new( config, - ®istry, + registry, resolver, vocabularies, draft, Location::new(), + pattern_options, ); // Validate the schema itself @@ -663,53 +706,55 @@ pub(crate) fn build_validator( #[cfg(feature = "resolve-async")] pub(crate) async fn build_validator_async( - config: &ValidationOptions>, + config: &ValidationOptions<'_, Arc>, schema: &Value, ) -> Result> { - let draft = config.draft_for(schema).await?; + let draft = config.draft_for_async(schema).await?; let resource_ref = draft.create_resource_ref(schema); - let resource = draft.create_resource(schema.clone()); let base_uri = if let Some(base_uri) = config.base_uri.as_ref() { uri::from_str(base_uri)? } else { uri::from_str(resource_ref.id().unwrap_or(DEFAULT_BASE_URI))? }; - // Clone resources to drain them without mutating the original config - let pairs = collect_resource_pairs(base_uri.as_str(), resource, config.resources.clone()); - - let registry = if let Some(ref registry) = config.registry { - Arc::new( - registry - .clone() - .try_with_resources_and_retriever_async(pairs, &*config.retriever, draft) - .await?, - ) + let owned_registry; + let (registry, needs_root_document): (&Registry, bool) = if let Some(registry) = config.registry + { + (registry, true) } else { - Arc::new( - Registry::options() - .async_retriever(Arc::clone(&config.retriever)) - .draft(draft) - .build(pairs) - .await?, - ) + owned_registry = Registry::builder() + .with_document(base_uri.as_str(), (schema, draft))? + .with_async_retriever(Arc::clone(&config.retriever)) + .build_async() + .await?; + (&owned_registry, false) }; - let vocabularies = registry.find_vocabularies(draft, schema); - let resolver = registry.resolver(base_uri); + + // Create resolution context with root document + let context = if needs_root_document { + registry + .context() + .with_root_document(base_uri.clone(), schema, draft)? + } else { + registry.context() + }; + let resolver = context.resolver(base_uri); // HACK: `ValidationOptions` struct has a default type parameter as `Arc` and to // avoid propagating types everywhere in `Context`, it is easier to just replace the // retriever to one that implements `Retrieve`, as it is not used anymore anyway. let config_with_blocking_retriever = config .clone() .with_blocking_retriever(crate::retriever::DefaultRetriever); + let pattern_options = config.pattern_options(); let ctx = Context::new( &config_with_blocking_retriever, - ®istry, + registry, resolver, vocabularies, draft, Location::new(), + pattern_options, ); if config.validate_schema { @@ -729,18 +774,6 @@ fn annotations_to_value(annotations: AHashMap) -> Arc { Arc::new(Value::Object(object)) } -fn collect_resource_pairs( - base_uri: &str, - resource: Resource, - resources: AHashMap, -) -> impl IntoIterator, Resource)> { - once((Cow::Borrowed(base_uri), resource)).chain( - resources - .into_iter() - .map(|(uri, resource)| (Cow::Owned(uri), resource)), - ) -} - fn validate_schema(draft: Draft, schema: &Value) -> Result<(), ValidationError<'static>> { // Boolean schemas are always valid per the spec, skip validation if schema.is_boolean() { diff --git a/crates/jsonschema/src/keywords/additional_properties.rs b/crates/jsonschema/src/keywords/additional_properties.rs index d993abdf..7ad0d8d4 100644 --- a/crates/jsonschema/src/keywords/additional_properties.rs +++ b/crates/jsonschema/src/keywords/additional_properties.rs @@ -1196,7 +1196,7 @@ pub(crate) fn compile<'a>( if let Some(patterns) = parent.get("patternProperties") { if let Value::Object(obj) = patterns { // Compile all patterns & their validators to avoid doing work in the `patternProperties` validator - match ctx.config().pattern_options() { + match ctx.pattern_options() { PatternEngineOptions::FancyRegex { .. } => { let patterns = match compile_fancy_regex_patterns(ctx, obj) { Ok(patterns) => patterns, diff --git a/crates/jsonschema/src/keywords/mod.rs b/crates/jsonschema/src/keywords/mod.rs index 32003f47..5f5fe0f6 100644 --- a/crates/jsonschema/src/keywords/mod.rs +++ b/crates/jsonschema/src/keywords/mod.rs @@ -172,7 +172,7 @@ impl fmt::Display for Keyword { } pub(crate) fn get_for_draft<'a>( - ctx: &compiler::Context<'a>, + ctx: &compiler::Context<'a, 'a>, keyword: &'a str, ) -> Option<(Keyword, CompileFunc<'a>)> { match (ctx.draft(), keyword) { diff --git a/crates/jsonschema/src/keywords/pattern.rs b/crates/jsonschema/src/keywords/pattern.rs index 95be853c..08138aa8 100644 --- a/crates/jsonschema/src/keywords/pattern.rs +++ b/crates/jsonschema/src/keywords/pattern.rs @@ -64,7 +64,7 @@ pub(crate) fn compile<'a>( schema: &'a Value, ) -> Option> { match schema { - Value::String(item) => match ctx.config().pattern_options() { + Value::String(item) => match ctx.pattern_options() { PatternEngineOptions::FancyRegex { .. } => { let Ok(regex) = ctx.get_or_compile_regex(item) else { return Some(Err(invalid_regex(ctx, schema))); diff --git a/crates/jsonschema/src/keywords/pattern_properties.rs b/crates/jsonschema/src/keywords/pattern_properties.rs index f481b5ed..d4b01e02 100644 --- a/crates/jsonschema/src/keywords/pattern_properties.rs +++ b/crates/jsonschema/src/keywords/pattern_properties.rs @@ -183,7 +183,7 @@ pub(crate) fn compile<'a>( ))); }; let ctx = ctx.new_at_location("patternProperties"); - let result = match ctx.config().pattern_options() { + let result = match ctx.pattern_options() { PatternEngineOptions::FancyRegex { .. } => { compile_pattern_entries(&ctx, map, |pctx, pattern, subschema| { pctx.get_or_compile_regex(pattern) diff --git a/crates/jsonschema/src/keywords/unevaluated_items.rs b/crates/jsonschema/src/keywords/unevaluated_items.rs index c5d381b3..3a1284f9 100644 --- a/crates/jsonschema/src/keywords/unevaluated_items.rs +++ b/crates/jsonschema/src/keywords/unevaluated_items.rs @@ -201,7 +201,7 @@ impl ConditionalValidators { /// can evaluate items. Handles circular references via pending nodes cached /// by location and schema pointer. fn compile_items_validators<'a>( - ctx: &compiler::Context<'_>, + ctx: &compiler::Context<'_, '_>, parent: &'a Map, ) -> Result> { let unevaluated = compile_unevaluated(ctx, parent)?; @@ -236,7 +236,7 @@ fn compile_items_validators<'a>( } fn compile_unevaluated<'a>( - ctx: &compiler::Context<'_>, + ctx: &compiler::Context<'_, '_>, parent: &'a Map, ) -> Result, ValidationError<'a>> { if let Some(subschema) = parent.get("unevaluatedItems") { @@ -251,7 +251,7 @@ fn compile_unevaluated<'a>( } fn compile_contains<'a>( - ctx: &compiler::Context<'_>, + ctx: &compiler::Context<'_, '_>, parent: &'a Map, ) -> Result, ValidationError<'a>> { if let Some(subschema) = parent.get("contains") { @@ -266,7 +266,7 @@ fn compile_contains<'a>( } fn compile_ref<'a>( - ctx: &compiler::Context<'_>, + ctx: &compiler::Context<'_, '_>, parent: &'a Map, ) -> Result, ValidationError<'a>> { if let Some(Value::String(reference)) = parent.get("$ref") { @@ -281,7 +281,7 @@ fn compile_ref<'a>( } fn compile_dynamic_ref<'a>( - ctx: &compiler::Context<'_>, + ctx: &compiler::Context<'_, '_>, parent: &'a Map, ) -> Result>, ValidationError<'a>> { if let Some(Value::String(reference)) = parent.get("$dynamicRef") { @@ -296,7 +296,7 @@ fn compile_dynamic_ref<'a>( } fn compile_recursive_ref<'a>( - ctx: &compiler::Context<'_>, + ctx: &compiler::Context<'_, '_>, parent: &Map, ) -> Result, ValidationError<'a>> { if !parent.contains_key("$recursiveRef") { @@ -338,7 +338,7 @@ fn compile_recursive_ref<'a>( } fn compile_items<'a>( - ctx: &compiler::Context<'_>, + ctx: &compiler::Context<'_, '_>, parent: &'a Map, ) -> Result<(Option, bool), ValidationError<'a>> { if let Some(subschema) = parent.get("items") { @@ -364,7 +364,7 @@ fn compile_items<'a>( } fn compile_prefix_items<'a>( - _ctx: &compiler::Context<'_>, + _ctx: &compiler::Context<'_, '_>, parent: &'a Map, ) -> Result, ValidationError<'a>> { if let Some(Some(items)) = parent.get("prefixItems").map(Value::as_array) { @@ -375,7 +375,7 @@ fn compile_prefix_items<'a>( } fn compile_conditional<'a>( - ctx: &compiler::Context<'_>, + ctx: &compiler::Context<'_, '_>, parent: &'a Map, ) -> Result>, ValidationError<'a>> { if let Some(subschema) = parent.get("if") { @@ -414,7 +414,7 @@ fn compile_conditional<'a>( } fn compile_all_of<'a>( - ctx: &compiler::Context<'_>, + ctx: &compiler::Context<'_, '_>, parent: &'a Map, ) -> Result>, ValidationError<'a>> { if let Some(Some(subschemas)) = parent.get("allOf").map(Value::as_array) { @@ -440,7 +440,7 @@ fn compile_all_of<'a>( } fn compile_any_of<'a>( - ctx: &compiler::Context<'_>, + ctx: &compiler::Context<'_, '_>, parent: &'a Map, ) -> Result>, ValidationError<'a>> { if let Some(Some(subschemas)) = parent.get("anyOf").map(Value::as_array) { @@ -466,7 +466,7 @@ fn compile_any_of<'a>( } fn compile_one_of<'a>( - ctx: &compiler::Context<'_>, + ctx: &compiler::Context<'_, '_>, parent: &'a Map, ) -> Result>, ValidationError<'a>> { if let Some(Some(subschemas)) = parent.get("oneOf").map(Value::as_array) { diff --git a/crates/jsonschema/src/keywords/unevaluated_properties.rs b/crates/jsonschema/src/keywords/unevaluated_properties.rs index 3b82270a..ff345c22 100644 --- a/crates/jsonschema/src/keywords/unevaluated_properties.rs +++ b/crates/jsonschema/src/keywords/unevaluated_properties.rs @@ -216,7 +216,7 @@ impl ConditionalValidators { /// can evaluate properties. Handles circular references via pending nodes cached /// by location and schema pointer. fn compile_property_validators<'a>( - ctx: &compiler::Context<'_>, + ctx: &compiler::Context<'_, '_>, parent: &'a Map, ) -> Result> { // Create a pending node and cache it before compiling to handle circular refs @@ -254,7 +254,7 @@ fn compile_property_validators<'a>( } fn compile_properties<'a>( - ctx: &compiler::Context<'_>, + ctx: &compiler::Context<'_, '_>, parent: &'a Map, ) -> Result, ValidationError<'a>> { let Some(Value::Object(map)) = parent.get("properties") else { @@ -275,7 +275,7 @@ fn compile_properties<'a>( } fn compile_additional<'a>( - ctx: &compiler::Context<'_>, + ctx: &compiler::Context<'_, '_>, parent: &'a Map, ) -> Result, ValidationError<'a>> { let Some(subschema) = parent.get("additionalProperties") else { @@ -289,7 +289,7 @@ fn compile_additional<'a>( } fn compile_pattern_properties<'a>( - ctx: &compiler::Context<'_>, + ctx: &compiler::Context<'_, '_>, parent: &'a Map, ) -> Result, ValidationError<'a>> { let Some(Value::Object(patterns)) = parent.get("patternProperties") else { @@ -319,7 +319,7 @@ fn compile_pattern_properties<'a>( } fn compile_unevaluated<'a>( - ctx: &compiler::Context<'_>, + ctx: &compiler::Context<'_, '_>, parent: &'a Map, ) -> Result, ValidationError<'a>> { let Some(subschema) = parent.get("unevaluatedProperties") else { @@ -333,7 +333,7 @@ fn compile_unevaluated<'a>( } fn compile_all_of<'a>( - ctx: &compiler::Context<'_>, + ctx: &compiler::Context<'_, '_>, parent: &'a Map, ) -> Result, ValidationError<'a>> { let Some(Some(subschemas)) = parent.get("allOf").map(Value::as_array) else { @@ -358,7 +358,7 @@ fn compile_all_of<'a>( } fn compile_any_of<'a>( - ctx: &compiler::Context<'_>, + ctx: &compiler::Context<'_, '_>, parent: &'a Map, ) -> Result, ValidationError<'a>> { let Some(Some(subschemas)) = parent.get("anyOf").map(Value::as_array) else { @@ -383,7 +383,7 @@ fn compile_any_of<'a>( } fn compile_one_of<'a>( - ctx: &compiler::Context<'_>, + ctx: &compiler::Context<'_, '_>, parent: &'a Map, ) -> Result, ValidationError<'a>> { let Some(Some(subschemas)) = parent.get("oneOf").map(Value::as_array) else { @@ -408,7 +408,7 @@ fn compile_one_of<'a>( } fn compile_conditional<'a>( - ctx: &compiler::Context<'_>, + ctx: &compiler::Context<'_, '_>, parent: &'a Map, ) -> Result>, ValidationError<'a>> { let Some(Value::Object(if_schema)) = parent.get("if") else { @@ -446,7 +446,7 @@ fn compile_conditional<'a>( } fn compile_ref<'a>( - ctx: &compiler::Context<'_>, + ctx: &compiler::Context<'_, '_>, parent: &Map, ) -> Result, ValidationError<'a>> { let Some(Value::String(reference)) = parent.get("$ref") else { @@ -469,7 +469,7 @@ fn compile_ref<'a>( } fn compile_dynamic_ref<'a>( - ctx: &compiler::Context<'_>, + ctx: &compiler::Context<'_, '_>, parent: &Map, ) -> Result>, ValidationError<'a>> { let Some(Value::String(reference)) = parent.get("$dynamicRef") else { @@ -492,7 +492,7 @@ fn compile_dynamic_ref<'a>( } fn compile_recursive_ref<'a>( - ctx: &compiler::Context<'_>, + ctx: &compiler::Context<'_, '_>, parent: &Map, ) -> Result, ValidationError<'a>> { if !parent.contains_key("$recursiveRef") { @@ -534,7 +534,7 @@ fn compile_recursive_ref<'a>( } fn compile_dependent<'a>( - ctx: &compiler::Context<'_>, + ctx: &compiler::Context<'_, '_>, parent: &'a Map, ) -> Result, ValidationError<'a>> { let Some(Value::Object(map)) = parent.get("dependentSchemas") else { diff --git a/crates/jsonschema/src/lib.rs b/crates/jsonschema/src/lib.rs index c27bee74..31b2f349 100644 --- a/crates/jsonschema/src/lib.rs +++ b/crates/jsonschema/src/lib.rs @@ -531,7 +531,7 @@ //! ```rust //! # fn main() -> Result<(), Box> { //! use serde_json::json; -//! use jsonschema::Resource; +//! use referencing::Registry; //! //! // Root schema with multiple definitions //! let root_schema = json!({ @@ -560,8 +560,14 @@ //! let user_schema = json!({"$ref": "https://example.com/root#/definitions/User"}); //! //! // Register the root schema and build validator for the specific definition +//! let registry = Registry::builder() +//! .with_document("https://example.com/root", root_schema.clone()) +//! .expect("Failed to register documents") +//! .build() +//! .expect("Failed to build registry"); +//! //! let validator = jsonschema::options() -//! .with_resource("https://example.com/root", Resource::from_contents(root_schema)) +//! .with_registry(®istry) //! .build(&user_schema)?; //! //! // Now validate data against just the User definition @@ -1105,7 +1111,7 @@ pub async fn async_validator_for(schema: &Value) -> Result ValidationOptions { +pub fn options<'doc>() -> ValidationOptions<'doc> { Validator::options() } @@ -1171,7 +1177,7 @@ pub fn options() -> ValidationOptions { /// See [`ValidationOptions`] for all available configuration options. #[cfg(feature = "resolve-async")] #[must_use] -pub fn async_options() -> ValidationOptions> { +pub fn async_options<'doc>() -> ValidationOptions<'doc, std::sync::Arc> { Validator::async_options() } @@ -1208,21 +1214,21 @@ pub mod meta { /// }); /// /// assert!(jsonschema::meta::options() - /// .with_registry(registry) + /// .with_registry(®istry) /// .is_valid(&schema)); /// ``` #[must_use] - pub fn options() -> MetaSchemaOptions { + pub fn options<'doc>() -> MetaSchemaOptions<'doc> { MetaSchemaOptions::default() } /// Options for meta-schema validation. #[derive(Clone, Default)] - pub struct MetaSchemaOptions { - registry: Option, + pub struct MetaSchemaOptions<'doc> { + registry: Option<&'doc Registry<'doc>>, } - impl MetaSchemaOptions { + impl<'doc> MetaSchemaOptions<'doc> { /// Use a registry for resolving custom meta-schemas. /// /// # Examples @@ -1242,10 +1248,10 @@ pub mod meta { /// ).unwrap(); /// /// let options = jsonschema::meta::options() - /// .with_registry(registry); + /// .with_registry(®istry); /// ``` #[must_use] - pub fn with_registry(mut self, registry: Registry) -> Self { + pub fn with_registry(mut self, registry: &'doc Registry<'doc>) -> Self { self.registry = Some(registry); self } @@ -1257,7 +1263,7 @@ pub mod meta { /// Panics if the meta-schema cannot be resolved. #[must_use] pub fn is_valid(&self, schema: &Value) -> bool { - match try_meta_validator_for(schema, self.registry.as_ref()) { + match try_meta_validator_for(schema, self.registry) { Ok(validator) => validator.as_ref().is_valid(schema), Err(e) => panic!("Failed to resolve meta-schema: {e}"), } @@ -1269,7 +1275,7 @@ pub mod meta { /// /// Returns [`ValidationError`] if the schema is invalid or if the meta-schema cannot be resolved. pub fn validate<'a>(&self, schema: &'a Value) -> Result<(), ValidationError<'a>> { - let validator = try_meta_validator_for(schema, self.registry.as_ref())?; + let validator = try_meta_validator_for(schema, self.registry)?; validator.as_ref().validate(schema) } } @@ -1514,9 +1520,9 @@ pub mod meta { try_meta_validator_for(schema, None) } - fn try_meta_validator_for<'a>( + fn try_meta_validator_for<'a, 'doc>( schema: &Value, - registry: Option<&Registry>, + registry: Option<&'doc Registry<'doc>>, ) -> Result, ValidationError<'static>> { let draft = Draft::default().detect(schema); @@ -1531,9 +1537,10 @@ pub mod meta { if let Some(registry) = registry { let (custom_meta_schema, resolved_draft) = resolve_meta_schema_with_registry(meta_schema_uri, registry)?; + // Create ValidationOptions with appropriate lifetime for registry let validator = crate::options() .with_draft(resolved_draft) - .with_registry(registry.clone()) + .with_registry(registry) .without_schema_validation() .build(&custom_meta_schema)?; return Ok(MetaValidator::owned(validator)); @@ -1557,12 +1564,14 @@ pub mod meta { uri: &str, registry: &Registry, ) -> Result<(Value, Draft), ValidationError<'static>> { - let resolver = registry.try_resolver(uri)?; + let context = registry.context(); + let resolver = context.try_resolver(uri)?; let first_resolved = resolver.lookup("")?; let first_meta_schema = first_resolved.contents().clone(); let draft = walk_meta_schema_chain(uri, |current_uri| { - let resolver = registry.try_resolver(current_uri)?; + let context = registry.context(); + let resolver = context.try_resolver(current_uri)?; let resolved = resolver.lookup("")?; Ok(resolved.contents().clone()) })?; @@ -1730,7 +1739,7 @@ pub mod draft4 { /// /// See [`ValidationOptions`] for all available configuration options. #[must_use] - pub fn options() -> ValidationOptions { + pub fn options() -> ValidationOptions<'static> { crate::options().with_draft(Draft::Draft4) } @@ -1911,7 +1920,7 @@ pub mod draft6 { /// /// See [`ValidationOptions`] for all available configuration options. #[must_use] - pub fn options() -> ValidationOptions { + pub fn options() -> ValidationOptions<'static> { crate::options().with_draft(Draft::Draft6) } @@ -2092,7 +2101,7 @@ pub mod draft7 { /// /// See [`ValidationOptions`] for all available configuration options. #[must_use] - pub fn options() -> ValidationOptions { + pub fn options() -> ValidationOptions<'static> { crate::options().with_draft(Draft::Draft7) } @@ -2273,7 +2282,7 @@ pub mod draft201909 { /// /// See [`ValidationOptions`] for all available configuration options. #[must_use] - pub fn options() -> ValidationOptions { + pub fn options() -> ValidationOptions<'static> { crate::options().with_draft(Draft::Draft201909) } @@ -2456,7 +2465,7 @@ pub mod draft202012 { /// /// See [`ValidationOptions`] for all available configuration options. #[must_use] - pub fn options() -> ValidationOptions { + pub fn options() -> ValidationOptions<'static> { crate::options().with_draft(Draft::Draft202012) } @@ -3144,15 +3153,16 @@ mod tests { } }); - // Register the custom meta-schema as a resource - let resources = vec![( - "http://custom.example.com/schema".to_string(), + // Register the custom meta-schema in a registry + let registry = Registry::try_new( + "http://custom.example.com/schema", Resource::from_contents(meta_schema), - )]; + ) + .expect("Should create registry"); let validator = crate::options() .without_schema_validation() - .with_resources(resources.into_iter()) + .with_registry(®istry) .build(&schema) .expect("Should build validator"); @@ -3278,21 +3288,22 @@ mod tests { "type": "object" }); - // Build the validator with both the meta-schema and the element schema as resources - let resources = vec![ + // Build a registry with both the meta-schema and the element schema + let registry = Registry::try_from_resources([ ( - "http://example.com/meta/schema".to_string(), + "http://example.com/meta/schema", referencing::Resource::from_contents(meta_schema), ), ( - "http://example.com/schemas/element".to_string(), + "http://example.com/schemas/element", referencing::Resource::from_contents(element_schema.clone()), ), - ]; + ]) + .expect("Should create registry"); let validator = crate::options() .without_schema_validation() - .with_resources(resources.into_iter()) + .with_registry(®istry) .build(&element_schema) .expect("Should successfully build validator with custom meta-schema"); @@ -3338,7 +3349,7 @@ mod tests { let validator = crate::options() .without_schema_validation() - .with_registry(registry) + .with_registry(®istry) .build(&schema) .expect("Should build validator"); @@ -3375,7 +3386,7 @@ mod tests { }); assert!(crate::meta::options() - .with_registry(registry.clone()) + .with_registry(®istry) .is_valid(&valid_schema)); // Invalid schema - top-level typo "typ" instead of "type" @@ -3385,7 +3396,7 @@ mod tests { }); assert!(!crate::meta::options() - .with_registry(registry.clone()) + .with_registry(®istry) .is_valid(&invalid_schema_top_level)); // Invalid schema - nested invalid keyword "minSize" (not a real JSON Schema keyword) @@ -3398,7 +3409,7 @@ mod tests { }); assert!(!crate::meta::options() - .with_registry(registry) + .with_registry(®istry) .is_valid(&invalid_schema_nested)); } @@ -3437,7 +3448,7 @@ mod tests { let validator = crate::options() .without_schema_validation() - .with_registry(registry) + .with_registry(®istry) .build(&schema) .expect("Should build validator"); @@ -3472,11 +3483,11 @@ mod tests { }); assert!(crate::meta::options() - .with_registry(registry.clone()) + .with_registry(®istry) .is_valid(&schema)); assert!(crate::meta::options() - .with_registry(registry) + .with_registry(®istry) .validate(&schema) .is_ok()); } @@ -3502,11 +3513,11 @@ mod tests { }); assert!(!crate::meta::options() - .with_registry(registry.clone()) + .with_registry(®istry) .is_valid(&schema)); assert!(crate::meta::options() - .with_registry(registry) + .with_registry(®istry) .validate(&schema) .is_err()); } @@ -3527,7 +3538,7 @@ mod tests { }); assert!(crate::meta::options() - .with_registry(registry) + .with_registry(®istry) .is_valid(&schema)); } @@ -3563,7 +3574,7 @@ mod tests { }); assert!(crate::meta::options() - .with_registry(registry) + .with_registry(®istry) .is_valid(&schema)); } @@ -3604,7 +3615,7 @@ mod tests { }); let result = crate::meta::options() - .with_registry(registry.clone()) + .with_registry(®istry) .validate(&schema); assert!( @@ -3614,7 +3625,7 @@ mod tests { ); assert!(crate::meta::options() - .with_registry(registry) + .with_registry(®istry) .is_valid(&schema)); } @@ -3658,7 +3669,7 @@ mod tests { }); let result = crate::meta::options() - .with_registry(registry) + .with_registry(®istry) .validate(&schema); assert!(result.is_err()); @@ -3691,7 +3702,7 @@ mod tests { }); let result = crate::meta::options() - .with_registry(registry) + .with_registry(®istry) .validate(&schema); assert!(result.is_err()); @@ -3738,7 +3749,7 @@ mod tests { }); let result = crate::meta::options() - .with_registry(registry) + .with_registry(®istry) .validate(&schema); assert!(result.is_err()); @@ -3770,7 +3781,7 @@ mod tests { }); let result = crate::meta::options() - .with_registry(registry) + .with_registry(®istry) .validate(&schema); assert!(result.is_ok()); @@ -3801,7 +3812,7 @@ mod tests { }); let result = crate::meta::options() - .with_registry(registry) + .with_registry(®istry) .validate(&schema_valid_for_draft6); assert!(result.is_ok()); @@ -3829,7 +3840,7 @@ mod tests { }); let result = crate::meta::options() - .with_registry(registry) + .with_registry(®istry) .validate(&schema); assert!(result.is_ok()); @@ -3862,7 +3873,7 @@ mod tests { }); let result = crate::meta::options() - .with_registry(registry) + .with_registry(®istry) .validate(&schema); assert!(result.is_ok()); @@ -3908,7 +3919,7 @@ mod tests { }); let result = crate::meta::options() - .with_registry(registry) + .with_registry(®istry) .validate(&schema); assert!(result.is_ok()); @@ -4164,7 +4175,7 @@ mod async_tests { // Create a validator using the pre-populated registry let validator = crate::async_options() - .with_registry(registry) + .with_registry(®istry) .build(&json!({ "$ref": "https://example.com/user.json" })) diff --git a/crates/jsonschema/src/node.rs b/crates/jsonschema/src/node.rs index 5481e401..3ad592c5 100644 --- a/crates/jsonschema/src/node.rs +++ b/crates/jsonschema/src/node.rs @@ -161,7 +161,10 @@ impl Validate for PendingSchemaNode { } impl SchemaNode { - pub(crate) fn from_boolean(ctx: &Context<'_>, validator: Option) -> SchemaNode { + pub(crate) fn from_boolean( + ctx: &Context<'_, '_>, + validator: Option, + ) -> SchemaNode { SchemaNode { location: ctx.location().clone(), absolute_path: ctx.base_uri(), @@ -170,7 +173,7 @@ impl SchemaNode { } pub(crate) fn from_keywords( - ctx: &Context<'_>, + ctx: &Context<'_, '_>, validators: Vec<(Keyword, BoxedValidator)>, unmatched_keywords: Option>, ) -> SchemaNode { @@ -197,7 +200,7 @@ impl SchemaNode { } } - pub(crate) fn from_array(ctx: &Context<'_>, validators: Vec) -> SchemaNode { + pub(crate) fn from_array(ctx: &Context<'_, '_>, validators: Vec) -> SchemaNode { let absolute_path = ctx.base_uri(); let validators = validators .into_iter() diff --git a/crates/jsonschema/src/options.rs b/crates/jsonschema/src/options.rs index e83f9102..ba6b46ea 100644 --- a/crates/jsonschema/src/options.rs +++ b/crates/jsonschema/src/options.rs @@ -12,13 +12,13 @@ use crate::{ Keyword, ValidationError, Validator, }; use ahash::AHashMap; -use referencing::{Draft, Resource, Retrieve}; +use referencing::{Draft, Retrieve}; use serde_json::Value; use std::{fmt, marker::PhantomData, sync::Arc}; /// Configuration options for JSON Schema validation. #[derive(Clone)] -pub struct ValidationOptions> { +pub struct ValidationOptions<'doc, R = Arc> { pub(crate) draft: Option, content_media_type_checks: AHashMap<&'static str, Option>, content_encoding_checks_and_converters: @@ -26,18 +26,17 @@ pub struct ValidationOptions> { pub(crate) base_uri: Option, /// Retriever for external resources pub(crate) retriever: R, - /// Additional resources that should be addressable during validation. - pub(crate) resources: AHashMap, - pub(crate) registry: Option, + pub(crate) registry: Option<&'doc referencing::Registry<'doc>>, formats: AHashMap>, validate_formats: Option, pub(crate) validate_schema: bool, ignore_unknown_formats: bool, keywords: AHashMap>, pattern_options: PatternEngineOptions, + _phantom: PhantomData<&'doc ()>, } -impl Default for ValidationOptions> { +impl Default for ValidationOptions<'_, Arc> { fn default() -> Self { ValidationOptions { draft: None, @@ -45,7 +44,6 @@ impl Default for ValidationOptions> { content_encoding_checks_and_converters: AHashMap::default(), base_uri: None, retriever: Arc::new(DefaultRetriever), - resources: AHashMap::default(), registry: None, formats: AHashMap::default(), validate_formats: None, @@ -53,12 +51,13 @@ impl Default for ValidationOptions> { ignore_unknown_formats: true, keywords: AHashMap::default(), pattern_options: PatternEngineOptions::default(), + _phantom: PhantomData, } } } #[cfg(feature = "resolve-async")] -impl Default for ValidationOptions> { +impl Default for ValidationOptions<'_, Arc> { fn default() -> Self { ValidationOptions { draft: None, @@ -66,7 +65,6 @@ impl Default for ValidationOptions> { content_encoding_checks_and_converters: AHashMap::default(), base_uri: None, retriever: Arc::new(DefaultRetriever), - resources: AHashMap::default(), registry: None, formats: AHashMap::default(), validate_formats: None, @@ -74,11 +72,12 @@ impl Default for ValidationOptions> { ignore_unknown_formats: true, keywords: AHashMap::default(), pattern_options: PatternEngineOptions::default(), + _phantom: PhantomData, } } } -impl ValidationOptions { +impl<'doc, R> ValidationOptions<'doc, R> { /// Return the draft version, or the default if not set. pub(crate) fn draft(&self) -> Draft { self.draft.unwrap_or_default() @@ -271,70 +270,6 @@ impl ValidationOptions { self.base_uri = Some(base_uri.into()); self } - /// Add a custom schema, allowing it to be referenced by the specified URI during validation. - /// - /// This enables the use of additional in-memory schemas alongside the main schema being validated. - /// - /// # Example - /// - /// ```rust - /// # use serde_json::json; - /// # fn main() -> Result<(), Box> { - /// use jsonschema::Resource; - /// - /// let extra = Resource::from_contents(json!({"minimum": 5})); - /// - /// let validator = jsonschema::options() - /// .with_resource("urn:minimum-schema", extra) - /// .build(&json!({"$ref": "urn:minimum-schema"}))?; - /// assert!(validator.is_valid(&json!(5))); - /// assert!(!validator.is_valid(&json!(4))); - /// # Ok(()) - /// # } - /// ``` - #[must_use] - pub fn with_resource(mut self, uri: impl Into, resource: Resource) -> Self { - self.resources.insert(uri.into(), resource); - self - } - /// Add custom schemas, allowing them to be referenced by the specified URI during validation. - /// - /// This enables the use of additional in-memory schemas alongside the main schema being validated. - /// - /// # Example - /// - /// ```rust - /// # use serde_json::json; - /// # fn main() -> Result<(), Box> { - /// use jsonschema::Resource; - /// - /// let validator = jsonschema::options() - /// .with_resources([ - /// ( - /// "urn:minimum-schema", - /// Resource::from_contents(json!({"minimum": 5})), - /// ), - /// ( - /// "urn:maximum-schema", - /// Resource::from_contents(json!({"maximum": 10})), - /// ), - /// ].into_iter()) - /// .build(&json!({"$ref": "urn:minimum-schema"}))?; - /// assert!(validator.is_valid(&json!(5))); - /// assert!(!validator.is_valid(&json!(4))); - /// # Ok(()) - /// # } - /// ``` - #[must_use] - pub fn with_resources( - mut self, - pairs: impl Iterator, Resource)>, - ) -> Self { - for (uri, resource) in pairs { - self.resources.insert(uri.into(), resource); - } - self - } /// Use external schema resources from the registry, making them accessible via references /// during validation. /// @@ -355,7 +290,7 @@ impl ValidationOptions { /// } /// }); /// let validator = jsonschema::options() - /// .with_registry(registry) + /// .with_registry(®istry) /// .build(&schema)?; /// assert!(validator.is_valid(&json!({ "name": "Valid String" }))); /// assert!(!validator.is_valid(&json!({ "name": 123 }))); @@ -363,7 +298,7 @@ impl ValidationOptions { /// # } /// ``` #[must_use] - pub fn with_registry(mut self, registry: referencing::Registry) -> Self { + pub fn with_registry(mut self, registry: &'doc referencing::Registry<'doc>) -> Self { self.registry = Some(registry); self } @@ -510,32 +445,12 @@ impl ValidationOptions { pub(crate) fn get_keyword_factory(&self, name: &str) -> Option<&Arc> { self.keywords.get(name) } -} -impl ValidationOptions> { - /// Build a JSON Schema validator using the current options. - /// - /// # Example - /// - /// ```rust - /// use serde_json::json; - /// - /// let schema = json!({"type": "string"}); - /// let validator = jsonschema::options() - /// .build(&schema) - /// .expect("A valid schema"); - /// - /// assert!(validator.is_valid(&json!("Hello"))); - /// assert!(!validator.is_valid(&json!(42))); - /// ``` - /// - /// # Errors - /// - /// Returns an error if `schema` is invalid for the selected draft or if referenced resources - /// cannot be retrieved or resolved. - pub fn build(&self, schema: &Value) -> Result> { - compiler::build_validator(self, schema) + pub(crate) const fn pattern_options(&self) -> PatternEngineOptions { + self.pattern_options } + + #[cfg_attr(not(feature = "resolve-async"), allow(dead_code))] pub(crate) fn draft_for(&self, contents: &Value) -> Result> { // Preference: // - Explicitly set @@ -565,23 +480,38 @@ impl ValidationOptions> { } } - fn resolve_draft_from_registry( + pub(crate) fn resolve_draft_from_registry( uri: &str, - registry: &referencing::Registry, + registry: &referencing::Registry<'_>, ) -> Result> { let uri = uri.trim_end_matches('#'); crate::meta::walk_meta_schema_chain(uri, |current_uri| { - let resolver = registry.try_resolver(current_uri)?; + let context = registry.context(); + let resolver = context.try_resolver(current_uri)?; let resolved = resolver.lookup("")?; Ok(resolved.contents().clone()) }) } +} + +impl ValidationOptions<'_, Arc> { + /// Build a JSON Schema validator using the current options. + /// + /// # Errors + /// + /// Returns an error if `schema` is invalid for the selected draft or if referenced resources + /// cannot be retrieved or resolved. + pub fn build(&self, schema: &Value) -> Result> { + compiler::build_validator(self, schema) + } + /// Set a retriever to fetch external resources. #[must_use] pub fn with_retriever(mut self, retriever: impl Retrieve + 'static) -> Self { self.retriever = Arc::new(retriever); self } + /// Configure the regular expression engine used during validation for keywords like `pattern` /// or `patternProperties`. /// @@ -613,13 +543,10 @@ impl ValidationOptions> { self.pattern_options = options.inner; self } - pub(crate) fn pattern_options(&self) -> PatternEngineOptions { - self.pattern_options - } } #[cfg(feature = "resolve-async")] -impl ValidationOptions> { +impl<'doc> ValidationOptions<'doc, Arc> { /// Build a JSON Schema validator using the current async options. /// /// # Errors @@ -633,14 +560,13 @@ impl ValidationOptions> { pub fn with_retriever( self, retriever: impl referencing::AsyncRetrieve + 'static, - ) -> ValidationOptions> { + ) -> ValidationOptions<'doc, Arc> { ValidationOptions { draft: self.draft, retriever: Arc::new(retriever), content_media_type_checks: self.content_media_type_checks, content_encoding_checks_and_converters: self.content_encoding_checks_and_converters, base_uri: None, - resources: self.resources, registry: self.registry, formats: self.formats, validate_formats: self.validate_formats, @@ -648,36 +574,27 @@ impl ValidationOptions> { ignore_unknown_formats: self.ignore_unknown_formats, keywords: self.keywords, pattern_options: self.pattern_options, + _phantom: PhantomData, } } #[allow(clippy::unused_async)] - pub(crate) async fn draft_for( + pub(crate) async fn draft_for_async( &self, contents: &Value, ) -> Result> { - // Preference: - // - Explicitly set - // - Autodetected - // - Default - if let Some(draft) = self.draft { - Ok(draft) - } else { - let default = Draft::default(); - Ok(default.detect(contents)) - } + self.draft_for(contents) } /// Set a retriever to fetch external resources. pub(crate) fn with_blocking_retriever( self, retriever: impl Retrieve + 'static, - ) -> ValidationOptions> { + ) -> ValidationOptions<'doc, Arc> { ValidationOptions { draft: self.draft, retriever: Arc::new(retriever), content_media_type_checks: self.content_media_type_checks, content_encoding_checks_and_converters: self.content_encoding_checks_and_converters, base_uri: None, - resources: self.resources, registry: self.registry, formats: self.formats, validate_formats: self.validate_formats, @@ -685,11 +602,12 @@ impl ValidationOptions> { ignore_unknown_formats: self.ignore_unknown_formats, keywords: self.keywords, pattern_options: self.pattern_options, + _phantom: PhantomData, } } } -impl fmt::Debug for ValidationOptions { +impl fmt::Debug for ValidationOptions<'_, R> { fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result { fmt.debug_struct("CompilationConfig") .field("draft", &self.draft) @@ -864,7 +782,7 @@ mod tests { } }); let validator = crate::options() - .with_registry(registry) + .with_registry(®istry) .build(&schema) .expect("Invalid schema"); assert!(validator.is_valid(&json!({ "name": "Valid String" }))); @@ -957,12 +875,15 @@ mod tests { // Create a schema that references the specific definition let user_schema = json!({"$ref": "https://example.com/root#/definitions/User"}); - // Build validator with the root schema registered as a resource + let registry = referencing::Registry::builder() + .with_document("https://example.com/root", root_schema) + .expect("Failed to register root schema") + .build() + .expect("Failed to build registry"); + + // Build validator with the root schema registered in the registry let validator = crate::options() - .with_resource( - "https://example.com/root", - Resource::from_contents(root_schema), - ) + .with_registry(®istry) .build(&user_schema) .expect("Valid schema"); diff --git a/crates/jsonschema/src/retriever.rs b/crates/jsonschema/src/retriever.rs index a9023598..226949e4 100644 --- a/crates/jsonschema/src/retriever.rs +++ b/crates/jsonschema/src/retriever.rs @@ -333,7 +333,7 @@ mod async_tests { .expect("Registry creation failed"); let validator = crate::options() - .with_registry(registry) + .with_registry(®istry) .build(&schema) .expect("Invalid schema"); @@ -406,7 +406,7 @@ mod async_tests { .expect("Registry creation failed"); let validator = crate::options() - .with_registry(registry) + .with_registry(®istry) .build(&schema) .expect("Invalid schema"); diff --git a/crates/jsonschema/src/validator.rs b/crates/jsonschema/src/validator.rs index 5259fc04..e377643a 100644 --- a/crates/jsonschema/src/validator.rs +++ b/crates/jsonschema/src/validator.rs @@ -213,7 +213,7 @@ impl Validator { /// .build(&schema); /// ``` #[must_use] - pub fn options() -> ValidationOptions { + pub fn options<'doc>() -> ValidationOptions<'doc> { ValidationOptions::default() } /// Create a default [`ValidationOptions`] configured for async validation. @@ -242,7 +242,8 @@ impl Validator { /// For sync validation, use [`options()`](crate::options()) instead. #[cfg(feature = "resolve-async")] #[must_use] - pub fn async_options() -> ValidationOptions> { + pub fn async_options<'doc>( + ) -> ValidationOptions<'doc, std::sync::Arc> { ValidationOptions::default() } /// Create a validator using the default options. diff --git a/fuzz/fuzz_targets/referencing.rs b/fuzz/fuzz_targets/referencing.rs index 863fd1f8..4e5bcc3f 100644 --- a/fuzz/fuzz_targets/referencing.rs +++ b/fuzz/fuzz_targets/referencing.rs @@ -16,8 +16,9 @@ fuzz_target!(|data: (&[u8], &[u8], &[u8])| { ] { let resource = draft.create_resource(schema.clone()); if let Ok(registry) = Registry::try_new(base, resource) { + let context = registry.context(); if let Ok(resolver) = - registry.try_resolver("http://example.com/schema.json") + context.try_resolver("http://example.com/schema.json") { let _resolved = resolver.lookup(reference); }