Skip to content

Commit 969c63b

Browse files
committed
wip
Signed-off-by: Dmitry Dygalo <dmitry@dygalo.dev>
1 parent cc58ff5 commit 969c63b

File tree

1 file changed

+36
-57
lines changed

1 file changed

+36
-57
lines changed

crates/jsonschema-referencing/src/registry.rs

Lines changed: 36 additions & 57 deletions
Original file line numberDiff line numberDiff line change
@@ -983,6 +983,15 @@ enum ReferenceKind {
983983
/// Local `$ref`s are always resolved against the document root.
984984
type QueueEntry = (Arc<Uri<String>>, Arc<Uri<String>>, String, Draft);
985985

986+
/// A deferred local `$ref` target.
987+
///
988+
/// Like [`QueueEntry`] but carries the pre-resolved value address (`value_addr`) obtained
989+
/// for free during the `pointer()` call at push time. Used in [`process_deferred_refs`] to
990+
/// skip already-visited targets without a second `pointer()` traversal.
991+
///
992+
/// `(base_uri, document_root_uri, pointer, draft, value_addr)`
993+
type DeferredRef = (Arc<Uri<String>>, Arc<Uri<String>>, String, Draft, usize);
994+
986995
fn insert_borrowed_anchor_entries<'a>(
987996
index_data: &mut PreparedIndex<'a>,
988997
uri: &Arc<Uri<String>>,
@@ -1098,6 +1107,8 @@ fn insert_owned_discovered_index_entries<'a>(
10981107
struct ProcessingState<'a> {
10991108
queue: VecDeque<QueueEntry>,
11001109
seen: ReferenceTracker,
1110+
// The String is the original reference text (e.g. "./foo.json"), kept solely for
1111+
// `json-schema://`-scheme error messages where the resolved URI is not user-friendly.
11011112
external: AHashSet<(String, Uri<String>, ReferenceKind)>,
11021113
scratch: String,
11031114
refers_metaschemas: bool,
@@ -1108,9 +1119,10 @@ struct ProcessingState<'a> {
11081119
/// Deferred local-ref targets. During the main traversal, instead of calling
11091120
/// `collect_external_resources_recursive` immediately when a local `$ref` is found,
11101121
/// the target is pushed here. After `process_queue` completes (full document traversal),
1111-
/// subresource targets are already in `visited_schemas` and return in O(1);
1112-
/// non-subresource paths (e.g. `#/components/schemas/Foo`) are still fully traversed.
1113-
deferred_refs: Vec<QueueEntry>,
1122+
/// subresource targets are already in `visited_schemas` and skipped in O(1) via the
1123+
/// pre-stored value address; non-subresource paths (e.g. `#/components/schemas/Foo`)
1124+
/// are still fully traversed.
1125+
deferred_refs: Vec<DeferredRef>,
11141126
index_data: PreparedIndex<'a>,
11151127
}
11161128

@@ -1717,6 +1729,8 @@ async fn run_async_processing_loop<'a>(
17171729

17181730
fn handle_retrieve_error(
17191731
uri: &Uri<String>,
1732+
// The original reference string is used in error messages for `json-schema://` URIs
1733+
// where the resolved URI is not user-friendly (e.g. "./foo.json" vs "json-schema:///foo.json").
17201734
original: &str,
17211735
fragmentless: &Uri<String>,
17221736
error: Box<dyn std::error::Error + Send + Sync>,
@@ -1768,7 +1782,7 @@ fn collect_external_resources<'doc>(
17681782
refers_metaschemas: &mut bool,
17691783
draft: Draft,
17701784
doc_key: &Arc<Uri<String>>,
1771-
deferred_refs: &mut Vec<QueueEntry>,
1785+
deferred_refs: &mut Vec<DeferredRef>,
17721786
local_seen: &mut LocalSeen<'doc>,
17731787
) -> Result<(), Error> {
17741788
if base.scheme().as_str() == "urn" {
@@ -1787,19 +1801,16 @@ fn collect_external_resources<'doc>(
17871801
} else if $reference != "#" {
17881802
if $reference.starts_with('#') {
17891803
if mark_local_reference(local_seen, base, $reference) {
1790-
if let Some((referenced, resolved_base)) = pointer_with_base(
1791-
root,
1792-
$reference.trim_start_matches('#'),
1793-
base,
1794-
resolution_cache,
1795-
draft,
1796-
)? {
1804+
let ptr = $reference.trim_start_matches('#');
1805+
if let Some(referenced) = pointer(root, ptr) {
17971806
let target_draft = draft.detect(referenced);
1807+
let value_addr = std::ptr::from_ref::<Value>(referenced) as usize;
17981808
deferred_refs.push((
1799-
resolved_base,
1809+
Arc::clone(base),
18001810
Arc::clone(doc_key),
1801-
$reference.trim_start_matches('#').to_string(),
1811+
ptr.to_string(),
18021812
target_draft,
1813+
value_addr,
18031814
));
18041815
}
18051816
}
@@ -1879,7 +1890,7 @@ fn collect_external_resources_recursive<'doc>(
18791890
draft: Draft,
18801891
visited: &mut AHashSet<usize>,
18811892
doc_key: &Arc<Uri<String>>,
1882-
deferred_refs: &mut Vec<QueueEntry>,
1893+
deferred_refs: &mut Vec<DeferredRef>,
18831894
local_seen: &mut LocalSeen<'doc>,
18841895
) -> Result<(), Error> {
18851896
let ptr = std::ptr::from_ref::<Value>(contents) as usize;
@@ -1931,7 +1942,9 @@ fn collect_external_resources_recursive<'doc>(
19311942
/// Process deferred local-ref targets collected during the main traversal.
19321943
///
19331944
/// Called after `process_queue` finishes so that all subresource nodes are already in
1934-
/// `visited_schemas`. Subresource targets return in O(1); non-subresource targets
1945+
/// `visited_schemas`. Targets that were visited by the main BFS (e.g. `#/definitions/Foo`
1946+
/// under a JSON Schema keyword) are skipped in O(1) via the pre-stored value address,
1947+
/// avoiding a redundant `pointer()` traversal. Non-subresource targets
19351948
/// (e.g. `#/components/schemas/Foo`) are still fully traversed. New deferred entries
19361949
/// added during traversal are also processed iteratively until none remain.
19371950
fn process_deferred_refs<'a>(
@@ -1942,7 +1955,14 @@ fn process_deferred_refs<'a>(
19421955
) -> Result<(), Error> {
19431956
while !state.deferred_refs.is_empty() {
19441957
let batch = std::mem::take(&mut state.deferred_refs);
1945-
for (base, doc_key, pointer_path, draft) in batch {
1958+
for (base, doc_key, pointer_path, draft, value_addr) in batch {
1959+
// Fast path: if this target was already visited by the main BFS traversal
1960+
// (e.g. a `#/definitions/Foo` that `walk_subresources_with_path` descended into),
1961+
// all its subresources were processed and `collect_external_resources` was already
1962+
// called on each — skip without a redundant `pointer()` traversal.
1963+
if state.visited_schemas.contains(&value_addr) {
1964+
continue;
1965+
}
19461966
let Some(document) = documents.get(&doc_key) else {
19471967
continue;
19481968
};
@@ -2023,47 +2043,6 @@ pub fn pointer<'a>(document: &'a Value, pointer: &str) -> Option<&'a Value> {
20232043
)
20242044
}
20252045

2026-
#[allow(clippy::type_complexity)]
2027-
fn pointer_with_base<'a>(
2028-
document: &'a Value,
2029-
pointer: &str,
2030-
base: &Arc<Uri<String>>,
2031-
resolution_cache: &mut UriCache,
2032-
draft: Draft,
2033-
) -> Result<Option<(&'a Value, Arc<Uri<String>>)>, Error> {
2034-
if pointer.is_empty() {
2035-
return Ok(Some((document, Arc::clone(base))));
2036-
}
2037-
if !pointer.starts_with('/') {
2038-
return Ok(None);
2039-
}
2040-
2041-
let mut current = document;
2042-
let mut current_base = Arc::clone(base);
2043-
let mut current_draft = draft;
2044-
2045-
for token in pointer.split('/').skip(1).map(unescape_segment) {
2046-
current_draft = current_draft.detect(current);
2047-
if let Some(id) = current_draft.id_of(current) {
2048-
current_base = resolve_id(&current_base, id, resolution_cache)?;
2049-
}
2050-
2051-
current = match current {
2052-
Value::Object(map) => match map.get(&*token) {
2053-
Some(v) => v,
2054-
None => return Ok(None),
2055-
},
2056-
Value::Array(list) => match parse_index(&token).and_then(|x| list.get(x)) {
2057-
Some(v) => v,
2058-
None => return Ok(None),
2059-
},
2060-
_ => return Ok(None),
2061-
};
2062-
}
2063-
2064-
Ok(Some((current, current_base)))
2065-
}
2066-
20672046
// Taken from `serde_json`.
20682047
#[must_use]
20692048
pub fn parse_index(s: &str) -> Option<usize> {

0 commit comments

Comments
 (0)