Skip to content

Commit 150eb7d

Browse files
authored
Add near-miss suggestions for unresolved symbol link error messages (#420)
* Add a type to compute consecutive sequence lengths * Add a type to find "near-misses" for diagnostics * Identify near misses for unresolved symbol link error messages * Update comments about adding near-misses with current status. * Silence a deprecation warning in one of the unit tests * Update error message to reflect that suggestions can be non-symbols. * Add missing variable binding in condition for Swift < 5.7 compatibility * Small scoring adjustments for best-match ranking * Add link to documentation for `CollectionDifference` enumeration order * Simplify the implementations of segment insertions and removals * Update some terminology in implementation comments * Update previous FIXME comments to describe the follow up work * Assert that insert segments aren't followed by other insert segments. Also, lower one `precondition` to `assert`. * Make micro optimization in near-miss score calculation
1 parent c9a5c5e commit 150eb7d

File tree

9 files changed

+1862
-10
lines changed

9 files changed

+1862
-10
lines changed

Sources/SwiftDocC/Infrastructure/Link Resolution/PathHierarchy.swift

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -829,8 +829,17 @@ extension PathHierarchy.Error {
829829
func errorMessage(context: DocumentationContext) -> String {
830830
switch self {
831831
case .partialResult(let partialResult, let remaining, let available):
832-
return "Reference at \(partialResult.pathWithoutDisambiguation().singleQuoted) can't resolve \(remaining.singleQuoted). Available children: \(available.joined(separator: ", "))."
833-
832+
let nearMisses = NearMiss.bestMatches(for: available, against: remaining)
833+
let suggestion: String
834+
switch nearMisses.count {
835+
case 0:
836+
suggestion = "No similar pages. Available children: \(available.joined(separator: ", "))."
837+
case 1:
838+
suggestion = "Did you mean: \(nearMisses[0])?"
839+
default:
840+
suggestion = "Did you mean one of: \(nearMisses.joined(separator: ", "))?"
841+
}
842+
return "Reference at \(partialResult.pathWithoutDisambiguation().singleQuoted) can't resolve \(remaining.singleQuoted). \(suggestion)"
834843
case .notFound, .unfindableMatch:
835844
return "No local documentation matches this reference."
836845

Sources/SwiftDocC/Semantics/MarkupReferenceResolver.swift

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -70,7 +70,7 @@ struct MarkupReferenceResolver: MarkupRewriter {
7070
return nil
7171
}
7272

73-
// FIXME: Provide near-miss suggestion here. The user is likely to make mistakes with capitalization because of character input (rdar://59660520).
73+
// FIXME: Structure the `PathHierarchyBasedLinkResolver` near-miss suggestions as fixits. https://github.com/apple/swift-docc/issues/438 (rdar://103279313)
7474
let uncuratedArticleMatch = context.uncuratedArticles[bundle.articlesDocumentationRootReference.appendingPathOfReference(unresolved)]?.source
7575
problems.append(unresolvedReferenceProblem(reference: reference, source: source, range: range, severity: severity, uncuratedArticleMatch: uncuratedArticleMatch, underlyingErrorMessage: errorMessage))
7676
return nil

Sources/SwiftDocC/Semantics/ReferenceResolver.swift

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -84,7 +84,7 @@ struct ReferenceResolver: SemanticVisitor {
8484
return .success(resolved)
8585

8686
case let .failure(unresolved, errorMessage):
87-
// FIXME: Provide near-miss suggestion here. The user is likely to make mistakes with capitalization because of character input.
87+
// FIXME: Structure the `PathHierarchyBasedLinkResolver` near-miss suggestions as fixits. https://github.com/apple/swift-docc/issues/438 (rdar://103279313)
8888
let uncuratedArticleMatch = context.uncuratedArticles[bundle.documentationRootReference.appendingPathOfReference(unresolved)]?.source
8989
problems.append(unresolvedReferenceProblem(reference: reference, source: source, range: range, severity: severity, uncuratedArticleMatch: uncuratedArticleMatch, underlyingErrorMessage: errorMessage))
9090
return .failure(unresolved, errorMessage: errorMessage)
Lines changed: 214 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,214 @@
1+
/*
2+
This source file is part of the Swift.org open source project
3+
4+
Copyright (c) 2022 Apple Inc. and the Swift project authors
5+
Licensed under Apache License v2.0 with Runtime Library Exception
6+
7+
See https://swift.org/LICENSE.txt for license information
8+
See https://swift.org/CONTRIBUTORS.txt for Swift project authors
9+
*/
10+
11+
import Foundation
12+
13+
/// A collection of sparse segments that describe the subsequences that are common or different between two collections.
14+
struct CollectionChanges {
15+
/// The segments of common elements, removed elements, and inserted elements.
16+
let segments: [Segment]
17+
18+
/// A single segment that describe a number of elements that are either common between both collections, or that are removed or inserted in the second collection.
19+
struct Segment: Equatable {
20+
var kind: Kind
21+
var count: Int
22+
23+
enum Kind: Equatable {
24+
/// These elements are common between both collections.
25+
case common
26+
/// These elements are removed from the first collection to produce the second collection.
27+
case remove
28+
/// These elements are inserted in the first collection to produce the second collection.
29+
case insert
30+
}
31+
}
32+
33+
/// Creates a new collection changes value from the differences between to collections.
34+
///
35+
/// - Parameters:
36+
/// - from: The collection that the base is compared to.
37+
/// - to: The base collection.
38+
/// - areEquivalent: A closure that returns a Boolean value indicating whether two elements are equivalent.
39+
init<C>(from: C, to: C, by areEquivalent: (C.Element, C.Element) -> Bool = (==)) where C: BidirectionalCollection, C.Element: Hashable {
40+
guard !from.isEmpty else {
41+
segments = [.init(kind: .insert, count: to.count)]
42+
return
43+
}
44+
guard !to.isEmpty else {
45+
segments = [.init(kind: .remove, count: from.count)]
46+
return
47+
}
48+
49+
var changes = ChangeSegmentBuilder(originalCount: from.count)
50+
// The `CollectionDifference` enumeration order is documented; first removals in descending order then insertions in ascending order.
51+
// https://github.com/apple/swift/blob/main/stdlib/public/core/CollectionDifference.swift#L216-L235
52+
for change in to.difference(from: from, by: areEquivalent) {
53+
switch change {
54+
case .remove(let offset, _, _):
55+
changes.remove(at: offset)
56+
case .insert(let offset, _, _):
57+
changes.insert(at: offset)
58+
}
59+
}
60+
segments = changes.segments
61+
}
62+
}
63+
64+
/// A builder that applies collection differences to construct an array of ``Segment`` values.
65+
///
66+
/// - Important:
67+
/// Removals need to be applied in reverse order. All removals need to be applied before applying any insertions. Insertions need to be applied in order.
68+
private struct ChangeSegmentBuilder {
69+
typealias Segment = CollectionChanges.Segment
70+
71+
private(set) var segments: [Segment]
72+
73+
private var insertStartIndex = 0
74+
private var insertStartOffset = 0
75+
76+
init(originalCount: Int) {
77+
self.segments = [ Segment(kind: .common, count: originalCount) ]
78+
}
79+
80+
mutating func remove(at removalIndex: Int) {
81+
// Removals are applied in reverse order. When the first removal is applied, the only segment is the 'common' count.
82+
//
83+
// Each removal can be either be at the start of the segment, middle of the segment, or end of the segment.
84+
// - After removing from the start of the segment there can be no more removals (since those indices would be in ascending order).
85+
// - After removing from the middle, the 'common' segment is split in two with a 'remove' segment in between.
86+
// Since the removal has to be at a lower index, it can only be applied to the split 'original' segment.
87+
// - After removing from the end, the 'common' segment is made shorter and a new 'remove' segment is added after it.
88+
// Since the removal has to be at a lower index, it can only be applied to the shortened 'common' segment.
89+
//
90+
// This process repeats, meaning that every removal is always applied to the first segment.
91+
let segment = segments[0]
92+
assert(segment.kind == .common && removalIndex < segment.count, """
93+
The first segment should always be a 'common' segment (was \(segment.kind)) and (0 ..< \(segment.count)) should always contain the removal index (\(removalIndex)).
94+
If it's not, then that's means that the remove operations wasn't performed in reverse order.
95+
""")
96+
97+
if removalIndex == 0 {
98+
// Removing at the start of the segment
99+
if segment.count == 1 {
100+
segments.remove(at: 0)
101+
} else {
102+
segments[0].count -= 1
103+
}
104+
105+
if segments.first?.kind == .remove {
106+
segments[0].count += 1
107+
} else {
108+
segments.insert(Segment(kind: .remove, count: 1), at: 0)
109+
}
110+
}
111+
else if removalIndex == segment.count - 1 {
112+
// Removing at end of segment
113+
segments[0].count -= 1
114+
115+
if segments.count > 1, segments[1].kind == .remove {
116+
segments[1].count += 1
117+
} else {
118+
// Insert at `endIndex` is equivalent to `append()`
119+
segments.insert(Segment(kind: .remove, count: 1), at: 1)
120+
}
121+
} else {
122+
// Removal within segment
123+
let lowerSegmentCount = removalIndex
124+
let higherSegmentCount = segment.count - lowerSegmentCount - 1 // the 1 is for the removed element
125+
126+
// Split the segment in two with a new removal segment in-between.
127+
segments[0...0] = [
128+
Segment(kind: .common, count: lowerSegmentCount),
129+
Segment(kind: .remove, count: 1),
130+
Segment(kind: .common, count: higherSegmentCount),
131+
]
132+
}
133+
}
134+
135+
private func findSegment(toInsertAt index: Int) -> (segment: Segment, startOffset: Int, segmentIndex: Int)? {
136+
// Insertions are applied in order. This means that we can start with the previous offset and index.
137+
var offset = insertStartOffset
138+
for segmentIndex in insertStartIndex ..< segments.count {
139+
let segment = segments[segmentIndex]
140+
if segment.kind == .remove {
141+
continue
142+
}
143+
144+
if index <= offset + segment.count {
145+
return (segment, offset, segmentIndex)
146+
}
147+
offset += segment.count
148+
}
149+
return nil
150+
}
151+
152+
mutating func insert(at insertIndex: Int) {
153+
guard let (segment, startOffset, segmentIndex) = findSegment(toInsertAt: insertIndex) else {
154+
assert(segments.count == 1 && segments[0].kind == .remove, """
155+
The only case when a segment can't be found in the loop is if the only segment is a 'remove' segment.
156+
This happens when all the 'common' elements are removed (meaning that the 'from' and 'to' values have nothing in common.
157+
""")
158+
159+
segments.append(Segment(kind: .insert, count: 1))
160+
return
161+
}
162+
assert(segment.kind != .remove)
163+
164+
insertStartOffset = startOffset
165+
insertStartIndex = segmentIndex
166+
167+
guard segment.kind != .insert else {
168+
segments[segmentIndex].count += 1
169+
return
170+
}
171+
assert(segment.kind == .common)
172+
173+
if insertIndex == startOffset {
174+
// Insert at start of segment
175+
segments.insert(Segment(kind: .insert, count: 1), at: segmentIndex)
176+
} else if insertIndex == startOffset + segment.count {
177+
// Insert at end of segment
178+
let insertSegmentIndex = segmentIndex + 1
179+
180+
// If this is the last segment, append a new 'insert' segment
181+
guard insertSegmentIndex < segments.count else {
182+
segments.append(Segment(kind: .insert, count: 1))
183+
return
184+
}
185+
186+
switch segments[insertSegmentIndex].kind {
187+
case .insert:
188+
assertionFailure("Inserts are processed from low to high. There shouldn't be another 'insert' segment after 'segmentIndex'.")
189+
190+
case .common:
191+
// If the next segment is a 'common' segment, insert a new 'insert' segment before it
192+
segments.insert(Segment(kind: .insert, count: 1), at: insertSegmentIndex)
193+
194+
case .remove:
195+
// If the next segment is a 'remove' segment, skip over it so that insertions are always after removals.
196+
segments.insert(Segment(kind: .insert, count: 1), at: insertSegmentIndex + 1)
197+
198+
assert(insertSegmentIndex + 2 == segments.count || segments[insertSegmentIndex + 2].kind == .common,
199+
"If there's a segment after the remove segment, that is a segment of 'common' characters.")
200+
}
201+
} else {
202+
// Insert within segment
203+
let lowerSegmentCount = insertIndex - startOffset
204+
let higherSegmentCount = segment.count - lowerSegmentCount // nothing to add
205+
206+
// Split the segment in two with a new insertion segment in-between.
207+
segments[segmentIndex...segmentIndex] = [
208+
Segment(kind: .common, count: lowerSegmentCount),
209+
Segment(kind: .insert, count: 1),
210+
Segment(kind: .common, count: higherSegmentCount),
211+
]
212+
}
213+
}
214+
}
Lines changed: 111 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,111 @@
1+
/*
2+
This source file is part of the Swift.org open source project
3+
4+
Copyright (c) 2022 Apple Inc. and the Swift project authors
5+
Licensed under Apache License v2.0 with Runtime Library Exception
6+
7+
See https://swift.org/LICENSE.txt for license information
8+
See https://swift.org/CONTRIBUTORS.txt for Swift project authors
9+
*/
10+
11+
import Foundation
12+
13+
// A type that sorts and filters a list of strings based on how "similar" they are to a given string.
14+
//
15+
// This is meant mainly for diagnostics that wan't to offer meaning full suggestions to the end-user.
16+
enum NearMiss {
17+
18+
/// Returns the "best matches" among a list of possibilities based on how "similar" they are to a given string.
19+
static func bestMatches(for possibilities: [String], against authored: String) -> [String] {
20+
// There is no single right or wrong way to score changes. This implementation is completely arbitrary.
21+
// It's chosen because the relative scores that it computes provide "best match" results that are close
22+
// to what a person would expect. See ``NearMissTests``.
23+
24+
let goodMatches = possibilities.lazy
25+
.map { (text: String) -> (text: String, score: Double) in
26+
(text, NearMiss.score(CollectionChanges(from: authored, to: text)))
27+
}
28+
.filter {
29+
// A negative score is not considered very "similar" in this implementation.
30+
0 < $0.score
31+
}
32+
.sorted(by: { lhs, rhs in
33+
if lhs.score == rhs.score {
34+
return lhs.text < rhs.text // Sort same score alphabetically
35+
}
36+
return lhs.score > rhs.score // Sort by high score
37+
})
38+
39+
// Some common prefixes result in a large number of matches. For example, many types in Swift-DocC have
40+
// a "Documentation" prefix which yields a fairly high score in this implementation. To counteract this
41+
// we additionally filter out any match with a score that's less than 25% of the highest match's score.
42+
guard let bestScore = goodMatches.first?.score else {
43+
return []
44+
}
45+
let matchThreshold = bestScore / 4
46+
47+
return goodMatches
48+
.prefix(while: { matchThreshold < $0.score })
49+
// More than 10 results are likely not helpful to the user.
50+
.prefix(10)
51+
.map { $0.text }
52+
}
53+
54+
/// Computes the "score" for a collection of change segments.
55+
private static func score(_ changes: CollectionChanges) -> Double {
56+
// Again, there is no right or wrong way to score changes and this implementation is completely arbitrary.
57+
58+
// Give the first segment a bit more weight to its contribution to the total score
59+
guard let first = changes.segments.first else { return 0 }
60+
var score = NearMiss.score(first) * 1.75
61+
62+
for segment in changes.segments.dropFirst() {
63+
score += NearMiss.score(segment)
64+
}
65+
return score
66+
}
67+
68+
/// Computes the "score" for a single collection change segments.
69+
private static func score(_ segment: CollectionChanges.Segment) -> Double {
70+
// Again, there is no right or wrong way to score changes and this implementation is completely arbitrary.
71+
72+
// This implementation is built around a few basic ideas:
73+
//
74+
// - Common segments _add_ to a change collection's score,
75+
// - Inserted and removed segments _subtract from_ a change collection's score.
76+
// - Short "common segments" occur in differences that are very different ("orange" and "lemon" both contain a "e").
77+
// - A long sequence of common elements should contribute more than an equal length sequence of different characters.
78+
// In other words; a 50% match is still "good".
79+
// - The longer a common segment is, the more "similar" to two strings are.
80+
// - A removed segment contribute more than an inserted segment (since the author had written those characters).
81+
82+
switch segment.kind {
83+
case .common:
84+
if segment.count < 3 {
85+
// 1, or 2 common characters are too few to be what a person would consider a similarity.
86+
return 0.0
87+
} else {
88+
// To produce higher contributions for longer common sequences, this implementation sums the sequence (1...length)
89+
// and adds an arbitrary constant factor.
90+
return Double((1...segment.count).sum()) + 3
91+
}
92+
93+
// Segments of removed or inserted characters contribute to the score no matter the segment length.
94+
//
95+
// The score is linear to the length with scale factors that are tweaked to provide "best match" results that are close
96+
// to what a person would expect. See ``NearMissTests``.
97+
case .insert:
98+
return -Double(segment.count) * 1.5
99+
case .remove:
100+
// Removed characters contribute more than inserted characters since they represent something that the author wrote
101+
// that is missing in this match.
102+
return -Double(segment.count) * 3.0
103+
}
104+
}
105+
}
106+
107+
private extension ClosedRange where Bound == Int {
108+
func sum() -> Int {
109+
return (lowerBound + upperBound) * count / 2
110+
}
111+
}

Tests/SwiftDocCTests/Infrastructure/SymbolGraph/SymbolGraphLoaderTests.swift

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -112,6 +112,9 @@ class SymbolGraphLoaderTests: XCTestCase {
112112
XCTAssertEqual(moduleNameFrequency, ["Main": 1, "One": 1, "Two": 1, "Three": 1])
113113
}
114114

115+
// This test calls ``SymbolGraph.relationships`` which is deprecated.
116+
// Deprecating the test silences the deprecation warning when running the tests. It doesn't skip the test.
117+
@available(*, deprecated)
115118
func testLoadingHighNumberOfModulesConcurrently() throws {
116119
let tempURL = try createTemporaryDirectory()
117120

0 commit comments

Comments
 (0)