Skip to content

Commit a55f18c

Browse files
committed
fix: Use-after-free in async $ref resolution when multiple refs target the same external URL with different fragments
Signed-off-by: Dmitry Dygalo <[email protected]>
1 parent f4bdaa0 commit a55f18c

File tree

2 files changed

+144
-28
lines changed

2 files changed

+144
-28
lines changed

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88

99
### Fixed
1010

11+
- Use-after-free in async `$ref` resolution when multiple refs target the same external URL with different fragments. [#906](https://github.com/Stranger6667/jsonschema/issues/906)
1112
- `multipleOf` validation for large u64 values beyond `i64::MAX` with `arbitrary-precision` feature.
1213
- `Validator` not being `Send + Sync` on WASM targets. [#915](https://github.com/Stranger6667/jsonschema/issues/915)
1314

crates/jsonschema-referencing/src/registry.rs

Lines changed: 143 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -897,6 +897,8 @@ async fn process_resources_async(
897897
resolution_cache: &mut UriCache,
898898
default_draft: Draft,
899899
) -> Result<Vec<Arc<Uri<String>>>, Error> {
900+
type ExternalRefsByBase = AHashMap<Uri<String>, Vec<(String, Uri<String>, ReferenceKind)>>;
901+
900902
let mut state = ProcessingState::new();
901903
process_input_resources(pairs, documents, resources, &mut state)?;
902904

@@ -908,52 +910,63 @@ async fn process_resources_async(
908910
process_queue(&mut state, resources, anchors, resolution_cache)?;
909911

910912
if !state.external.is_empty() {
911-
let data = state
912-
.external
913-
.drain()
914-
.filter_map(|(original, uri, kind)| {
915-
let mut fragmentless = uri.clone();
916-
fragmentless.set_fragment(None);
917-
if resources.contains_key(&fragmentless) {
918-
None
919-
} else {
920-
Some((original, uri, kind, fragmentless))
921-
}
922-
})
923-
.collect::<Vec<_>>();
913+
// Group external refs by fragmentless URI to avoid fetching the same resource multiple times.
914+
// Multiple refs may point to the same base URL with different fragments (e.g., #/$defs/foo and #/$defs/bar).
915+
// We need to fetch each unique base URL only once, then handle all fragment refs against it.
916+
let mut grouped = ExternalRefsByBase::new();
917+
for (original, uri, kind) in state.external.drain() {
918+
let mut fragmentless = uri.clone();
919+
fragmentless.set_fragment(None);
920+
if !resources.contains_key(&fragmentless) {
921+
grouped
922+
.entry(fragmentless)
923+
.or_default()
924+
.push((original, uri, kind));
925+
}
926+
}
924927

928+
// Fetch each unique fragmentless URI once
929+
let entries: Vec<_> = grouped.into_iter().collect();
925930
let results = {
926-
let futures = data
931+
let futures = entries
927932
.iter()
928-
.map(|(_, _, _, fragmentless)| retriever.retrieve(fragmentless));
933+
.map(|(fragmentless, _)| retriever.retrieve(fragmentless));
929934
futures::future::join_all(futures).await
930935
};
931936

932-
for ((original, uri, kind, fragmentless), result) in data.iter().zip(results) {
937+
for ((fragmentless, refs), result) in entries.into_iter().zip(results) {
933938
let retrieved = match result {
934939
Ok(retrieved) => retrieved,
935940
Err(error) => {
936-
handle_retrieve_error(uri, original, fragmentless, error, *kind)?;
941+
// Report error for the first ref that caused this fetch
942+
if let Some((original, uri, kind)) = refs.into_iter().next() {
943+
handle_retrieve_error(&uri, &original, &fragmentless, error, kind)?;
944+
}
937945
continue;
938946
}
939947
};
940948

941949
let (key, resource) = create_resource(
942950
retrieved,
943-
fragmentless.clone(),
951+
fragmentless,
944952
default_draft,
945953
documents,
946954
resources,
947955
&mut state.custom_metaschemas,
948956
);
949-
handle_fragment(
950-
uri,
951-
&resource,
952-
&key,
953-
default_draft,
954-
&mut state.queue,
955-
Arc::clone(&key),
956-
);
957+
958+
// Handle all fragment refs that pointed to this base URL
959+
for (_, uri, _) in &refs {
960+
handle_fragment(
961+
uri,
962+
&resource,
963+
&key,
964+
default_draft,
965+
&mut state.queue,
966+
Arc::clone(&key),
967+
);
968+
}
969+
957970
state.queue.push_back((key, resource, None));
958971
}
959972
}
@@ -1815,7 +1828,6 @@ mod tests {
18151828

18161829
#[test]
18171830
fn test_invalid_reference() {
1818-
// Found via fuzzing
18191831
let resource = Draft::Draft202012.create_resource(json!({"$schema": "$##"}));
18201832
let _ = Registry::try_new("http://#/", resource);
18211833
}
@@ -1826,7 +1838,10 @@ mod async_tests {
18261838
use crate::{uri, DefaultRetriever, Draft, Registry, Resource, Uri};
18271839
use ahash::AHashMap;
18281840
use serde_json::{json, Value};
1829-
use std::error::Error;
1841+
use std::{
1842+
error::Error,
1843+
sync::atomic::{AtomicUsize, Ordering},
1844+
};
18301845

18311846
struct TestAsyncRetriever {
18321847
schemas: AHashMap<String, Value>,
@@ -2053,4 +2068,104 @@ mod async_tests {
20532068
&json!({"type": "string", "minLength": 1})
20542069
);
20552070
}
2071+
2072+
// Multiple refs to the same external schema with different fragments were fetched multiple times in async mode.
2073+
#[tokio::test]
2074+
async fn test_async_registry_with_duplicate_fragment_refs() {
2075+
static FETCH_COUNT: AtomicUsize = AtomicUsize::new(0);
2076+
2077+
struct CountingRetriever {
2078+
inner: TestAsyncRetriever,
2079+
}
2080+
2081+
#[cfg_attr(target_family = "wasm", async_trait::async_trait(?Send))]
2082+
#[cfg_attr(not(target_family = "wasm"), async_trait::async_trait)]
2083+
impl crate::AsyncRetrieve for CountingRetriever {
2084+
async fn retrieve(
2085+
&self,
2086+
uri: &Uri<String>,
2087+
) -> Result<Value, Box<dyn std::error::Error + Send + Sync>> {
2088+
FETCH_COUNT.fetch_add(1, Ordering::SeqCst);
2089+
self.inner.retrieve(uri).await
2090+
}
2091+
}
2092+
2093+
FETCH_COUNT.store(0, Ordering::SeqCst);
2094+
2095+
let retriever = CountingRetriever {
2096+
inner: TestAsyncRetriever::with_schema(
2097+
"http://example.com/external",
2098+
json!({
2099+
"$defs": {
2100+
"foo": {
2101+
"type": "object",
2102+
"properties": {
2103+
"nested": { "type": "string" }
2104+
}
2105+
},
2106+
"bar": {
2107+
"type": "object",
2108+
"properties": {
2109+
"value": { "type": "integer" }
2110+
}
2111+
}
2112+
}
2113+
}),
2114+
),
2115+
};
2116+
2117+
// Schema references the same external URL with different fragments
2118+
let registry = Registry::options()
2119+
.async_retriever(retriever)
2120+
.build([(
2121+
"http://example.com/main",
2122+
Resource::from_contents(json!({
2123+
"type": "object",
2124+
"properties": {
2125+
"name": { "$ref": "http://example.com/external#/$defs/foo" },
2126+
"age": { "$ref": "http://example.com/external#/$defs/bar" }
2127+
}
2128+
})),
2129+
)])
2130+
.await
2131+
.expect("Invalid resource");
2132+
2133+
// Should only fetch the external schema once
2134+
let fetches = FETCH_COUNT.load(Ordering::SeqCst);
2135+
assert_eq!(
2136+
fetches, 1,
2137+
"External schema should be fetched only once, but was fetched {fetches} times"
2138+
);
2139+
2140+
let resolver = registry
2141+
.try_resolver("http://example.com/main")
2142+
.expect("Invalid base URI");
2143+
2144+
// Verify both fragment references resolve correctly
2145+
let foo = resolver
2146+
.lookup("http://example.com/external#/$defs/foo")
2147+
.expect("Lookup failed");
2148+
assert_eq!(
2149+
foo.contents(),
2150+
&json!({
2151+
"type": "object",
2152+
"properties": {
2153+
"nested": { "type": "string" }
2154+
}
2155+
})
2156+
);
2157+
2158+
let bar = resolver
2159+
.lookup("http://example.com/external#/$defs/bar")
2160+
.expect("Lookup failed");
2161+
assert_eq!(
2162+
bar.contents(),
2163+
&json!({
2164+
"type": "object",
2165+
"properties": {
2166+
"value": { "type": "integer" }
2167+
}
2168+
})
2169+
);
2170+
}
20562171
}

0 commit comments

Comments
 (0)