Skip to content

Commit e6b8152

Browse files
authored
Fix a few issues with infinite recursions if the content contains cyclic curation (#898)
* Add functions to operate on directed graphs * Update topic graph to use sequences for graph traversal * Avoid computing full paths to determine if a symbol is manually curated * Fix an infinite recursion determining the paths to a node when there's cyclic curation rdar://126974763 * Avoid computing all paths when the caller only needs the shortest path * Avoid computing all paths when the caller only needs the roots/leaves * Avoid computing all paths when the caller only needs know if a certain node is encountered * Avoid computing all paths when the caller only needs to visit each reachable node once * Rename 'pathsTo(_:options:)' to 'finitePaths(to:options:) * Fix another infinite recursion when pages are curated in cycles rdar://126974763 * Fix correctness issue where symbol has two auto curated parents * Address code review feedback: - Rename neighbors parameter to edges for DirectedGraph initializer - Rename GraphPathIterator and move it to DirectedGraph+Paths file - Add convenience properties for topic graph directed graph "views - Elaborate on breadcrumbs path documentation and implementation comments - Elaborate on graph cycle documentation with concrete examples - Fix missing edge in directed graph test data - Use preconditionFailure for topic graph node that should always exist - Add additional graph cycle tests * Explicitly exit (trap) if trying to dump a topic graph with cyclic paths
1 parent 1357a2d commit e6b8152

21 files changed

+1462
-227
lines changed

Sources/SwiftDocC/Catalog Processing/GeneratedCurationWriter.swift

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -133,7 +133,7 @@ public struct GeneratedCurationWriter {
133133
for (usr, reference) in context.documentationCache.referencesBySymbolID {
134134
// Filter out symbols that aren't in the specified sub hierarchy.
135135
if symbolLink != nil || depthLimit != nil {
136-
guard reference == curationCrawlRoot || context.pathsTo(reference).contains(where: { path in path.suffix(depthLimit ?? .max).contains(curationCrawlRoot)}) else {
136+
guard reference == curationCrawlRoot || context.finitePaths(to: reference).contains(where: { path in path.suffix(depthLimit ?? .max).contains(curationCrawlRoot)}) else {
137137
continue
138138
}
139139
}

Sources/SwiftDocC/Indexing/Navigator/NavigatorIndex.swift

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -255,8 +255,7 @@ public class NavigatorIndex {
255255
}
256256

257257
/// Indicates the page type of a given item inside the tree.
258-
/// - Note: This information is stored as UInt8 to decrease the required size to store it and make
259-
/// the comparision faster between types.
258+
/// - Note: This information is stored as `UInt8` to decrease the required size to store it and make the comparison faster between types.
260259
public enum PageType: UInt8 {
261260
case root = 0
262261
case article = 1

Sources/SwiftDocC/Indexing/Navigator/NavigatorTree.swift

Lines changed: 42 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -480,20 +480,54 @@ public class NavigatorTree {
480480

481481
/// Copy the current node and children to new instances preserving the node item.
482482
public func copy() -> NavigatorTree.Node {
483+
// DocC detects cyclic curation and avoids adding it to the topic graph.
484+
// However, the navigator is based on the render node's topic sections which still has the cycle.
485+
//
486+
// Because the navigator nodes are shared instances, the cycle needs to be broken at each occurrence to correctly unroll it.
487+
// For example, consider this hierarchy:
488+
//
489+
// 1 2
490+
// │ │
491+
// ▼ ▼
492+
// 3 ────▶ 4
493+
// ▲ │
494+
// └── 5 ◀─┘
495+
//
496+
// When approaching the cycle from `1`, it unrolls as `1, 3, 4, 5` but when approaching it from `2` it unrolls as `2, 4, 5, 3`.
497+
// This ensures a consistent structure for the navigation tree, no matter which render node was indexed first.
498+
//
499+
// Alternatively we could process the entire graph and break the cycle based on which node has the shortest path from the root.
500+
// However, because DocC already warns about the cyclic curation, it seem sufficient to create a consistent navigation tree,
501+
// even if it's not as small as it could be if we were using a more sophisticated graph algorithm.
502+
483503
return _copy([])
484504
}
485505

486-
/// Private version of the logic to copy the current node and children to new instances preserving the node item.
487-
/// - Parameter hierarchy: The set containing the parent items to avoid entering in an infinite loop.
488-
private func _copy(_ hierarchy: Set<NavigatorItem>) -> NavigatorTree.Node {
506+
/// The private copy implementation which tracks the already encountered items to avoid an infinite recursion.
507+
private func _copy(_ seen: Set<NavigatorItem>) -> NavigatorTree.Node {
489508
let mirror = NavigatorTree.Node(item: item, bundleIdentifier: bundleIdentifier)
490-
guard !hierarchy.contains(item) else { return mirror } // Avoid to enter in an infity loop.
491-
var updatedHierarchy = hierarchy
492-
if let parentItem = parent?.item { updatedHierarchy.insert(parentItem) }
493-
children.forEach { (child) in
494-
let childCopy = child._copy(updatedHierarchy)
509+
guard !seen.contains(item) else { return mirror } // Avoid an infinite recursion.
510+
var seen = seen
511+
seen.insert(item)
512+
513+
// Avoid repeating children that have already occurred in this path through the navigation hierarchy.
514+
let childrenNotAlreadyEncountered = children.filter { !seen.contains($0.item) }
515+
for (child, indexAfter) in zip(childrenNotAlreadyEncountered, 1...) {
516+
// If the child is a group marker, ensure that the group is not empty by checking that it's followed by a non-group.
517+
// If we didn't do this, we could end up with empty groups if all the children had already been encountered higher up in the hierarchy.
518+
if child.item.pageType == NavigatorIndex.PageType.groupMarker.rawValue {
519+
guard indexAfter < childrenNotAlreadyEncountered.endIndex,
520+
childrenNotAlreadyEncountered[indexAfter].item.pageType != NavigatorIndex.PageType.groupMarker.rawValue
521+
else {
522+
// This group is empty. Skip copying it.
523+
continue
524+
}
525+
// This group is not empty. Include a copy of it
526+
}
527+
let childCopy = child._copy(seen)
495528
mirror.add(child: childCopy)
496529
}
530+
497531
return mirror
498532
}
499533

Lines changed: 90 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,90 @@
1+
/*
2+
This source file is part of the Swift.org open source project
3+
4+
Copyright (c) 2024 Apple Inc. and the Swift project authors
5+
Licensed under Apache License v2.0 with Runtime Library Exception
6+
7+
See https://swift.org/LICENSE.txt for license information
8+
See https://swift.org/CONTRIBUTORS.txt for Swift project authors
9+
*/
10+
11+
extension DocumentationContext {
12+
/// Options that configure how the context produces node breadcrumbs.
13+
struct PathOptions: OptionSet {
14+
let rawValue: Int
15+
16+
/// Prefer a technology as the canonical path over a shorter path.
17+
static let preferTechnologyRoot = PathOptions(rawValue: 1 << 0)
18+
}
19+
20+
/// Finds all finite (acyclic) paths, also called "breadcrumbs", to the given reference in the topic graph.
21+
///
22+
/// Each path is a list of references that describe a walk through the topic graph from a leaf node up to, but not including, the given `reference`.
23+
///
24+
/// The first path is the canonical path to the node. The other paths are sorted by increasing length (number of components).
25+
///
26+
/// > Note:
27+
/// If all paths from the given reference are infinite (cycle back on themselves) then this function will return an empty list, because there are no _finite_ paths in the topic graph from that reference.
28+
///
29+
/// - Parameters:
30+
/// - reference: The reference to find paths to.
31+
/// - options: Options for how the context produces node breadcrumbs.
32+
/// - Returns: A list of finite paths to the given reference in the topic graph.
33+
func finitePaths(to reference: ResolvedTopicReference, options: PathOptions = []) -> [[ResolvedTopicReference]] {
34+
topicGraph.reverseEdgesGraph
35+
.allFinitePaths(from: reference)
36+
// Graph traversal typically happens from the starting point outwards, but the callers of `finitePaths(to:options:)`
37+
// expect paths going inwards from the leaves to the starting point, excluding the starting point itself.
38+
// To match the caller's expectations we remove the starting point and then flip the paths.
39+
.map { $0.dropFirst().reversed() }
40+
.sorted { (lhs, rhs) -> Bool in
41+
// Order a path rooted in a technology as the canonical one.
42+
if options.contains(.preferTechnologyRoot), let first = lhs.first {
43+
return try! entity(with: first).semantic is Technology
44+
}
45+
46+
return breadcrumbsAreInIncreasingOrder(lhs, rhs)
47+
}
48+
}
49+
50+
/// Finds the shortest finite (acyclic) path, also called "breadcrumb", to the given reference in the topic graph.
51+
///
52+
/// The path is a list of references that describe a walk through the topic graph from a leaf node up to, but not including, the given `reference`.
53+
///
54+
/// > Note:
55+
/// If all paths from the given reference are infinite (cycle back on themselves) then this function will return `nil`, because there are no _finite_ paths in the topic graph from that reference.
56+
///
57+
/// - Parameter reference: The reference to find the shortest path to.
58+
/// - Returns: The shortest path to the given reference, or `nil` if all paths to the reference are infinite (contain cycles).
59+
func shortestFinitePath(to reference: ResolvedTopicReference) -> [ResolvedTopicReference]? {
60+
topicGraph.reverseEdgesGraph
61+
.shortestFinitePaths(from: reference)
62+
// Graph traversal typically happens from the starting point outwards, but the callers of `shortestFinitePaths(to:)`
63+
// expect paths going inwards from the leaves to the starting point, excluding the starting point itself.
64+
// To match the caller's expectations we remove the starting point and then flip the paths.
65+
.map { $0.dropFirst().reversed() }
66+
.min(by: breadcrumbsAreInIncreasingOrder)
67+
}
68+
69+
/// Finds all the reachable root node references from the given reference.
70+
///
71+
/// > Note:
72+
/// If all paths from the given reference are infinite (cycle back on themselves) then this function will return an empty set, because there are no reachable roots in the topic graph from that reference.
73+
///
74+
/// - Parameter reference: The reference to find reachable root node references from.
75+
/// - Returns: The references of the root nodes that are reachable fro the given reference, or `[]` if all paths from the reference are infinite (contain cycles).
76+
func reachableRoots(from reference: ResolvedTopicReference) -> Set<ResolvedTopicReference> {
77+
topicGraph.reverseEdgesGraph.reachableLeafNodes(from: reference)
78+
}
79+
}
80+
81+
/// Compares two breadcrumbs for sorting so that the breadcrumb with fewer components come first and breadcrumbs with the same number of components are sorted alphabetically.
82+
private func breadcrumbsAreInIncreasingOrder(_ lhs: [ResolvedTopicReference], _ rhs: [ResolvedTopicReference]) -> Bool {
83+
// If the breadcrumbs have the same number of components, sort alphabetically to produce stable results.
84+
guard lhs.count != rhs.count else {
85+
return lhs.map({ $0.path }).joined(separator: ",") < rhs.map({ $0.path }).joined(separator: ",")
86+
}
87+
// Otherwise, sort by the number of breadcrumb components.
88+
return lhs.count < rhs.count
89+
}
90+

Sources/SwiftDocC/Infrastructure/DocumentationContext.swift

Lines changed: 41 additions & 78 deletions
Original file line numberDiff line numberDiff line change
@@ -2247,7 +2247,7 @@ public class DocumentationContext: DocumentationContextDataProviderDelegate {
22472247
let automaticallyCurated = autoCurateSymbolsInTopicGraph()
22482248

22492249
// Crawl the rest of the symbols that haven't been crawled so far in hierarchy pre-order.
2250-
allCuratedReferences = try crawlSymbolCuration(in: automaticallyCurated.map(\.child), bundle: bundle, initial: allCuratedReferences)
2250+
allCuratedReferences = try crawlSymbolCuration(in: automaticallyCurated.map(\.symbol), bundle: bundle, initial: allCuratedReferences)
22512251

22522252
// Remove curation paths that have been created automatically above
22532253
// but we've found manual curation for in the second crawl pass.
@@ -2306,19 +2306,18 @@ public class DocumentationContext: DocumentationContextDataProviderDelegate {
23062306
/// call `removeUnneededAutomaticCuration(_:)` which walks the list of automatic curations and removes
23072307
/// the parent <-> child topic graph relationships that have been obsoleted.
23082308
///
2309-
/// - Parameter automaticallyCurated: A list of topics that have been automatically curated.
2310-
func removeUnneededAutomaticCuration(_ automaticallyCurated: [(child: ResolvedTopicReference, parent: ResolvedTopicReference)]) {
2311-
for pair in automaticallyCurated {
2312-
let paths = pathsTo(pair.child)
2313-
2314-
// Collect all current unique parents of the child.
2315-
let parents = Set(paths.map({ $0.last?.path }))
2316-
2317-
// Check if the topic has multiple curation paths
2318-
guard parents.count > 1 else { continue }
2319-
2320-
// The topic has been manually curated, remove the automatic curation now.
2321-
topicGraph.removeEdge(fromReference: pair.parent, toReference: pair.child)
2309+
/// - Parameter automaticallyCurated: A list of automatic curation records.
2310+
func removeUnneededAutomaticCuration(_ automaticallyCurated: [AutoCuratedSymbolRecord]) {
2311+
// It might look like it would be correct to check `topicGraph.nodes[symbol]?.isManuallyCurated` here,
2312+
// but that would incorrectly remove the only parent if the manual curation and the automatic curation was the same.
2313+
//
2314+
// Similarly, it might look like it would be correct to only check `parents(of: symbol).count > 1` here,
2315+
// but that would incorrectly remove the automatic curation for symbols with different language representations with different parents.
2316+
for (symbol, parent, counterpartParent) in automaticallyCurated where parents(of: symbol).count > (counterpartParent != nil ? 2 : 1) {
2317+
topicGraph.removeEdge(fromReference: parent, toReference: symbol)
2318+
if let counterpartParent {
2319+
topicGraph.removeEdge(fromReference: counterpartParent, toReference: symbol)
2320+
}
23222321
}
23232322
}
23242323

@@ -2359,20 +2358,39 @@ public class DocumentationContext: DocumentationContextDataProviderDelegate {
23592358
}
23602359
}
23612360

2361+
typealias AutoCuratedSymbolRecord = (symbol: ResolvedTopicReference, parent: ResolvedTopicReference, counterpartParent: ResolvedTopicReference?)
2362+
23622363
/// Curate all remaining uncurated symbols under their natural parent from the symbol graph.
23632364
///
23642365
/// This will include all symbols that were not manually curated by the documentation author.
23652366
/// - Returns: An ordered list of symbol references that have been added to the topic graph automatically.
2366-
private func autoCurateSymbolsInTopicGraph() -> [(child: ResolvedTopicReference, parent: ResolvedTopicReference)] {
2367-
var automaticallyCuratedSymbols = [(ResolvedTopicReference, ResolvedTopicReference)]()
2368-
linkResolver.localResolver.traverseSymbolAndParentPairs { reference, parentReference in
2367+
private func autoCurateSymbolsInTopicGraph() -> [AutoCuratedSymbolRecord] {
2368+
var automaticallyCuratedSymbols = [AutoCuratedSymbolRecord]()
2369+
linkResolver.localResolver.traverseSymbolAndParents { reference, parentReference, counterpartParentReference in
23692370
guard let topicGraphNode = topicGraph.nodeWithReference(reference),
2370-
let topicGraphParentNode = topicGraph.nodeWithReference(parentReference),
2371-
// Check that the node hasn't got any parents from manual curation
2371+
// Check that the node isn't already manually curated
23722372
!topicGraphNode.isManuallyCurated
23732373
else { return }
2374+
2375+
// Check that the symbol doesn't already have parent's that aren't either language representation's hierarchical parent.
2376+
// This for example happens for default implementation and symbols that are requirements of protocol conformances.
2377+
guard parents(of: reference).allSatisfy({ $0 == parentReference || $0 == counterpartParentReference }) else {
2378+
return
2379+
}
2380+
2381+
guard let topicGraphParentNode = topicGraph.nodeWithReference(parentReference) else {
2382+
preconditionFailure("Node with reference \(parentReference.absoluteString) exist in link resolver but not in topic graph.")
2383+
}
23742384
topicGraph.addEdge(from: topicGraphParentNode, to: topicGraphNode)
2375-
automaticallyCuratedSymbols.append((child: reference, parent: parentReference))
2385+
2386+
if let counterpartParentReference {
2387+
guard let topicGraphCounterpartParentNode = topicGraph.nodeWithReference(counterpartParentReference) else {
2388+
preconditionFailure("Node with reference \(counterpartParentReference.absoluteString) exist in link resolver but not in topic graph.")
2389+
}
2390+
topicGraph.addEdge(from: topicGraphCounterpartParentNode, to: topicGraphNode)
2391+
}
2392+
// Collect a single automatic curation record for both language representation parents.
2393+
automaticallyCuratedSymbols.append((reference, parentReference, counterpartParentReference))
23762394
}
23772395
return automaticallyCuratedSymbols
23782396
}
@@ -2736,15 +2754,9 @@ public class DocumentationContext: DocumentationContextDataProviderDelegate {
27362754
return topicGraph.nodes[reference]?.title
27372755
}
27382756

2739-
/**
2740-
Traverse the Topic Graph breadth-first, starting at the given reference.
2741-
*/
2742-
func traverseBreadthFirst(from reference: ResolvedTopicReference, _ observe: (TopicGraph.Node) -> TopicGraph.Traversal) {
2743-
guard let node = topicGraph.nodeWithReference(reference) else {
2744-
return
2745-
}
2746-
2747-
topicGraph.traverseBreadthFirst(from: node, observe)
2757+
/// Returns a sequence that traverses the topic graph in breadth first order from a given reference, without visiting the same node more than once.
2758+
func breadthFirstSearch(from reference: ResolvedTopicReference) -> some Sequence<TopicGraph.Node> {
2759+
topicGraph.breadthFirstSearch(from: reference)
27482760
}
27492761

27502762
/**
@@ -2866,55 +2878,6 @@ public class DocumentationContext: DocumentationContextDataProviderDelegate {
28662878
.map { $0.reference }
28672879
}
28682880

2869-
/// Options to consider when producing node breadcrumbs.
2870-
struct PathOptions: OptionSet {
2871-
let rawValue: Int
2872-
2873-
/// The node is a technology page; sort the path to a technology as canonical.
2874-
static let preferTechnologyRoot = PathOptions(rawValue: 1 << 0)
2875-
}
2876-
2877-
/// Finds all paths (breadcrumbs) to the given node reference.
2878-
///
2879-
/// Each path is an array of references to the symbols from the module symbol to the current one.
2880-
/// The first path in the array is always the canonical path to the symbol.
2881-
///
2882-
/// - Parameters:
2883-
/// - reference: The reference to build that paths to.
2884-
/// - currentPathToNode: Used for recursion - an accumulated path to "continue" working on.
2885-
/// - Returns: A list of paths to the current reference in the topic graph.
2886-
func pathsTo(_ reference: ResolvedTopicReference, currentPathToNode: [ResolvedTopicReference] = [], options: PathOptions = []) -> [[ResolvedTopicReference]] {
2887-
let nodeParents = parents(of: reference)
2888-
guard !nodeParents.isEmpty else {
2889-
// The path ends with this node
2890-
return [currentPathToNode]
2891-
}
2892-
var results = [[ResolvedTopicReference]]()
2893-
for parentReference in nodeParents {
2894-
let parentPaths = pathsTo(parentReference, currentPathToNode: [parentReference] + currentPathToNode)
2895-
results.append(contentsOf: parentPaths)
2896-
}
2897-
2898-
// We are sorting the breadcrumbs by the path distance to the documentation root
2899-
// so that the first element is the shortest path that we are using as canonical.
2900-
results.sort { (lhs, rhs) -> Bool in
2901-
// Order a path rooted in a technology as the canonical one.
2902-
if options.contains(.preferTechnologyRoot), let first = lhs.first {
2903-
return try! entity(with: first).semantic is Technology
2904-
}
2905-
2906-
// If the breadcrumbs have equal amount of components
2907-
// sort alphabetically to produce stable paths order.
2908-
guard lhs.count != rhs.count else {
2909-
return lhs.map({ $0.path }).joined(separator: ",") < rhs.map({ $0.path }).joined(separator: ",")
2910-
}
2911-
// Order by the length of the breadcrumb.
2912-
return lhs.count < rhs.count
2913-
}
2914-
2915-
return results
2916-
}
2917-
29182881
func dumpGraph() -> String {
29192882
return topicGraph.nodes.values
29202883
.filter { parents(of: $0.reference).isEmpty }

0 commit comments

Comments
 (0)