@@ -6,7 +6,7 @@ use std::{
66} ;
77
88use ahash:: { AHashMap , AHashSet } ;
9- use fluent_uri:: Uri ;
9+ use fluent_uri:: { pct_enc :: EStr , Uri } ;
1010use serde_json:: Value ;
1111
1212use crate :: {
@@ -653,13 +653,22 @@ enum ReferenceKind {
653653 Schema ,
654654}
655655
656+ /// An entry in the processing queue.
657+ /// The optional third element is the document root URI, used when the resource
658+ /// was extracted from a fragment of a larger document. Local `$ref`s need to be
659+ /// resolved against the document root, not just the fragment content.
660+ type QueueEntry = ( Arc < Uri < String > > , InnerResourcePtr , Option < Arc < Uri < String > > > ) ;
661+
656662struct ProcessingState {
657- queue : VecDeque < ( Arc < Uri < String > > , InnerResourcePtr ) > ,
663+ queue : VecDeque < QueueEntry > ,
658664 seen : ReferenceTracker ,
659665 external : AHashSet < ( String , Uri < String > , ReferenceKind ) > ,
660666 scratch : String ,
661667 refers_metaschemas : bool ,
662668 custom_metaschemas : Vec < Arc < Uri < String > > > ,
669+ /// Tracks schema pointers we've visited during recursive external resource collection.
670+ /// This prevents infinite recursion when schemas reference each other.
671+ visited_schemas : AHashSet < usize > ,
663672}
664673
665674impl ProcessingState {
@@ -671,6 +680,7 @@ impl ProcessingState {
671680 scratch : String :: new ( ) ,
672681 refers_metaschemas : false ,
673682 custom_metaschemas : Vec :: new ( ) ,
683+ visited_schemas : AHashSet :: new ( ) ,
674684 }
675685 }
676686}
@@ -697,7 +707,7 @@ fn process_input_resources(
697707 state. custom_metaschemas . push ( Arc :: clone ( & key) ) ;
698708 }
699709
700- state. queue . push_back ( ( key, resource) ) ;
710+ state. queue . push_back ( ( key, resource, None ) ) ;
701711 entry. insert ( wrapped_value) ;
702712 }
703713 }
@@ -711,31 +721,52 @@ fn process_queue(
711721 anchors : & mut AHashMap < AnchorKey , Anchor > ,
712722 resolution_cache : & mut UriCache ,
713723) -> Result < ( ) , Error > {
714- while let Some ( ( mut base, resource) ) = state. queue . pop_front ( ) {
724+ while let Some ( ( mut base, resource, document_root_uri ) ) = state. queue . pop_front ( ) {
715725 if let Some ( id) = resource. id ( ) {
716- base = resolution_cache . resolve_against ( & base. borrow ( ) , id) ?;
726+ base = resolve_id ( & base, id, resolution_cache ) ?;
717727 resources. insert ( base. clone ( ) , resource. clone ( ) ) ;
718728 }
719729
720730 for anchor in resource. anchors ( ) {
721731 anchors. insert ( AnchorKey :: new ( base. clone ( ) , anchor. name ( ) ) , anchor) ;
722732 }
723733
724- collect_external_resources (
725- & base,
726- resource. contents ( ) ,
727- resource. contents ( ) ,
728- & mut state. external ,
729- & mut state. seen ,
730- resolution_cache,
731- & mut state. scratch ,
732- & mut state. refers_metaschemas ,
733- resource. draft ( ) ,
734- ) ?;
734+ // Determine the document root for resolving local $refs.
735+ // If document_root_uri is set (e.g., for fragment-extracted resources),
736+ // look up the full document. Otherwise, this resource IS the document root.
737+ let root = document_root_uri
738+ . as_ref ( )
739+ . and_then ( |uri| resources. get ( uri) )
740+ . map_or_else ( || resource. contents ( ) , InnerResourcePtr :: contents) ;
741+
742+ // Skip if already visited during local $ref resolution
743+ let contents_ptr = std:: ptr:: from_ref :: < Value > ( resource. contents ( ) ) as usize ;
744+ if state. visited_schemas . insert ( contents_ptr) {
745+ collect_external_resources (
746+ & base,
747+ root,
748+ resource. contents ( ) ,
749+ & mut state. external ,
750+ & mut state. seen ,
751+ resolution_cache,
752+ & mut state. scratch ,
753+ & mut state. refers_metaschemas ,
754+ resource. draft ( ) ,
755+ & mut state. visited_schemas ,
756+ ) ?;
757+ }
735758
759+ // Subresources inherit the document root URI, or use the current base if none set
760+ let subresource_root_uri = document_root_uri. or_else ( || Some ( base. clone ( ) ) ) ;
736761 for contents in resource. draft ( ) . subresources_of ( resource. contents ( ) ) {
737- let subresource = InnerResourcePtr :: new ( contents, resource. draft ( ) ) ;
738- state. queue . push_back ( ( base. clone ( ) , subresource) ) ;
762+ // Skip subresources already visited during local $ref resolution
763+ let sub_ptr = std:: ptr:: from_ref :: < Value > ( contents) as usize ;
764+ if !state. visited_schemas . contains ( & sub_ptr) {
765+ let subresource = InnerResourcePtr :: new ( contents, resource. draft ( ) ) ;
766+ state
767+ . queue
768+ . push_back ( ( base. clone ( ) , subresource, subresource_root_uri. clone ( ) ) ) ;
769+ }
739770 }
740771 }
741772 Ok ( ( ) )
@@ -746,14 +777,15 @@ fn handle_fragment(
746777 resource : & InnerResourcePtr ,
747778 key : & Arc < Uri < String > > ,
748779 default_draft : Draft ,
749- queue : & mut VecDeque < ( Arc < Uri < String > > , InnerResourcePtr ) > ,
780+ queue : & mut VecDeque < QueueEntry > ,
781+ document_root_uri : Arc < Uri < String > > ,
750782) {
751783 if let Some ( fragment) = uri. fragment ( ) {
752784 if let Some ( resolved) = pointer ( resource. contents ( ) , fragment. as_str ( ) ) {
753785 let draft = default_draft. detect ( resolved) ;
754786 let contents = std:: ptr:: addr_of!( * resolved) ;
755787 let resource = InnerResourcePtr :: new ( contents, draft) ;
756- queue. push_back ( ( Arc :: clone ( key) , resource) ) ;
788+ queue. push_back ( ( Arc :: clone ( key) , resource, Some ( document_root_uri ) ) ) ;
757789 }
758790 }
759791}
@@ -841,8 +873,15 @@ fn process_resources(
841873 resources,
842874 & mut state. custom_metaschemas ,
843875 ) ;
844- handle_fragment ( & uri, & resource, & key, default_draft, & mut state. queue ) ;
845- state. queue . push_back ( ( key, resource) ) ;
876+ handle_fragment (
877+ & uri,
878+ & resource,
879+ & key,
880+ default_draft,
881+ & mut state. queue ,
882+ Arc :: clone ( & key) ,
883+ ) ;
884+ state. queue . push_back ( ( key, resource, None ) ) ;
846885 }
847886 }
848887 }
@@ -911,8 +950,15 @@ async fn process_resources_async(
911950 resources,
912951 & mut state. custom_metaschemas ,
913952 ) ;
914- handle_fragment ( uri, & resource, & key, default_draft, & mut state. queue ) ;
915- state. queue . push_back ( ( key, resource) ) ;
953+ handle_fragment (
954+ uri,
955+ & resource,
956+ & key,
957+ default_draft,
958+ & mut state. queue ,
959+ Arc :: clone ( & key) ,
960+ ) ;
961+ state. queue . push_back ( ( key, resource, None ) ) ;
916962 }
917963 }
918964 }
@@ -992,6 +1038,7 @@ fn collect_external_resources(
9921038 scratch : & mut String ,
9931039 refers_metaschemas : & mut bool ,
9941040 draft : Draft ,
1041+ visited : & mut AHashSet < usize > ,
9951042) -> Result < ( ) , Error > {
9961043 // URN schemes are not supported for external resolution
9971044 if base. scheme ( ) . as_str ( ) == "urn" {
@@ -1013,13 +1060,18 @@ fn collect_external_resources(
10131060 // Handle local references separately as they may have nested references to external resources
10141061 if $reference. starts_with( '#' ) {
10151062 // Use the root document for pointer resolution since local refs are always
1016- // relative to the document root, not the current subschema
1017- if let Some ( referenced) =
1018- pointer( root, $reference. trim_start_matches( '#' ) )
1019- {
1063+ // relative to the document root, not the current subschema.
1064+ // Also track $id changes along the path to get the correct base URI.
1065+ if let Some ( ( referenced, resolved_base) ) = pointer_with_base(
1066+ root,
1067+ $reference. trim_start_matches( '#' ) ,
1068+ base,
1069+ resolution_cache,
1070+ draft,
1071+ ) ? {
10201072 // Recursively collect from the referenced schema and all its subresources
10211073 collect_external_resources_recursive(
1022- base ,
1074+ & resolved_base ,
10231075 root,
10241076 referenced,
10251077 collected,
@@ -1028,6 +1080,7 @@ fn collect_external_resources(
10281080 scratch,
10291081 refers_metaschemas,
10301082 draft,
1083+ visited,
10311084 ) ?;
10321085 }
10331086 } else {
@@ -1102,6 +1155,9 @@ fn collect_external_resources(
11021155}
11031156
11041157/// Recursively collect external resources from a schema and all its subresources.
1158+ ///
1159+ /// The `visited` set tracks schema pointers we've already processed to avoid infinite
1160+ /// recursion when schemas reference each other (directly or through subresources).
11051161fn collect_external_resources_recursive (
11061162 base : & Arc < Uri < String > > ,
11071163 root : & Value ,
@@ -1112,10 +1168,22 @@ fn collect_external_resources_recursive(
11121168 scratch : & mut String ,
11131169 refers_metaschemas : & mut bool ,
11141170 draft : Draft ,
1171+ visited : & mut AHashSet < usize > ,
11151172) -> Result < ( ) , Error > {
1173+ // Track by pointer address to avoid processing the same schema twice
1174+ let ptr = std:: ptr:: from_ref :: < Value > ( contents) as usize ;
1175+ if !visited. insert ( ptr) {
1176+ return Ok ( ( ) ) ;
1177+ }
1178+
1179+ let current_base = match draft. id_of ( contents) {
1180+ Some ( id) => resolve_id ( base, id, resolution_cache) ?,
1181+ None => Arc :: clone ( base) ,
1182+ } ;
1183+
11161184 // First, collect from the current schema
11171185 collect_external_resources (
1118- base ,
1186+ & current_base ,
11191187 root,
11201188 contents,
11211189 collected,
@@ -1124,21 +1192,26 @@ fn collect_external_resources_recursive(
11241192 scratch,
11251193 refers_metaschemas,
11261194 draft,
1195+ visited,
11271196 ) ?;
11281197
1129- // Then recursively process all subresources
1198+ // Then recursively process all subresources (skip already-visited ones early)
11301199 for subresource in draft. subresources_of ( contents) {
1131- collect_external_resources_recursive (
1132- base,
1133- root,
1134- subresource,
1135- collected,
1136- seen,
1137- resolution_cache,
1138- scratch,
1139- refers_metaschemas,
1140- draft,
1141- ) ?;
1200+ let sub_ptr = std:: ptr:: from_ref :: < Value > ( subresource) as usize ;
1201+ if !visited. contains ( & sub_ptr) {
1202+ collect_external_resources_recursive (
1203+ & current_base,
1204+ root,
1205+ subresource,
1206+ collected,
1207+ seen,
1208+ resolution_cache,
1209+ scratch,
1210+ refers_metaschemas,
1211+ draft,
1212+ visited,
1213+ ) ?;
1214+ }
11421215 }
11431216 Ok ( ( ) )
11441217}
@@ -1147,6 +1220,25 @@ fn mark_reference(seen: &mut ReferenceTracker, base: &Arc<Uri<String>>, referenc
11471220 seen. insert ( ReferenceKey :: new ( base, reference) )
11481221}
11491222
1223+ /// Resolve an `$id` against a base URI, handling anchor-style IDs and empty fragments.
1224+ ///
1225+ /// Anchor-style `$id` values (starting with `#`) don't change the base URI.
1226+ /// Empty fragments are stripped from the resolved URI.
1227+ fn resolve_id (
1228+ base : & Arc < Uri < String > > ,
1229+ id : & str ,
1230+ resolution_cache : & mut UriCache ,
1231+ ) -> Result < Arc < Uri < String > > , Error > {
1232+ if id. starts_with ( '#' ) {
1233+ return Ok ( Arc :: clone ( base) ) ;
1234+ }
1235+ let mut resolved = ( * resolution_cache. resolve_against ( & base. borrow ( ) , id) ?) . clone ( ) ;
1236+ if resolved. fragment ( ) . is_some_and ( EStr :: is_empty) {
1237+ resolved. set_fragment ( None ) ;
1238+ }
1239+ Ok ( Arc :: new ( resolved) )
1240+ }
1241+
11501242/// Look up a value by a JSON Pointer.
11511243///
11521244/// **NOTE**: A slightly faster version of pointer resolution based on `Value::pointer` from `serde_json`.
@@ -1167,6 +1259,53 @@ pub fn pointer<'a>(document: &'a Value, pointer: &str) -> Option<&'a Value> {
11671259 )
11681260}
11691261
1262+ /// Look up a value by a JSON Pointer, tracking `$id` changes along the path.
1263+ ///
1264+ /// Returns both the resolved value and the accumulated base URI after processing
1265+ /// any `$id` declarations encountered along the path. Note that anchor-style `$id`
1266+ /// values (starting with `#`) don't change the base URI.
1267+ #[ allow( clippy:: type_complexity) ]
1268+ fn pointer_with_base < ' a > (
1269+ document : & ' a Value ,
1270+ pointer : & str ,
1271+ base : & Arc < Uri < String > > ,
1272+ resolution_cache : & mut UriCache ,
1273+ draft : Draft ,
1274+ ) -> Result < Option < ( & ' a Value , Arc < Uri < String > > ) > , Error > {
1275+ if pointer. is_empty ( ) {
1276+ return Ok ( Some ( ( document, Arc :: clone ( base) ) ) ) ;
1277+ }
1278+ if !pointer. starts_with ( '/' ) {
1279+ return Ok ( None ) ;
1280+ }
1281+
1282+ let mut current = document;
1283+ let mut current_base = Arc :: clone ( base) ;
1284+
1285+ for token in pointer. split ( '/' ) . skip ( 1 ) . map ( unescape_segment) {
1286+ // Check for $id in the current value before traversing deeper
1287+ if let Some ( id) = draft. id_of ( current) {
1288+ current_base = resolve_id ( & current_base, id, resolution_cache) ?;
1289+ }
1290+
1291+ current = match current {
1292+ Value :: Object ( map) => match map. get ( & * token) {
1293+ Some ( v) => v,
1294+ None => return Ok ( None ) ,
1295+ } ,
1296+ Value :: Array ( list) => match parse_index ( & token) . and_then ( |x| list. get ( x) ) {
1297+ Some ( v) => v,
1298+ None => return Ok ( None ) ,
1299+ } ,
1300+ _ => return Ok ( None ) ,
1301+ } ;
1302+ }
1303+
1304+ // Note: We don't check $id in the final value here because
1305+ // `collect_external_resources_recursive` will handle it
1306+ Ok ( Some ( ( current, current_base) ) )
1307+ }
1308+
11701309// Taken from `serde_json`.
11711310#[ must_use]
11721311pub fn parse_index ( s : & str ) -> Option < usize > {
0 commit comments