Skip to content

Commit 97ffe42

Browse files
committed
Add inline documentation for Levenshtein algorithm
1 parent 0f7b136 commit 97ffe42

File tree

1 file changed

+34
-3
lines changed

1 file changed

+34
-3
lines changed

Sources/DynamicJSON/Utilities.swift

Lines changed: 34 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -24,17 +24,21 @@ extension String {
2424
///
2525
/// Returns: A normalized version of the string for consistent lookup.
2626
func normalizedKey() -> String {
27-
// Replace hyphens and spaces with underscores for consistent splitting
27+
// Convert hyphens and spaces to underscores for a unified delimiter
2828
let sanitized = self
2929
.replacingOccurrences(of: "-", with: "_")
3030
.replacingOccurrences(of: " ", with: "_")
3131

32+
// Pattern splits between lowercase/digit followed by uppercase, or on existing underscores
3233
let pattern = #"(?<=[a-z0-9])(?=[A-Z])|_"#
34+
// Compile the regular expression
3335
let regex = try! NSRegularExpression(pattern: pattern, options: [])
3436

3537
let range = NSRange(sanitized.startIndex..<sanitized.endIndex, in: sanitized)
38+
// Insert underscores where pattern matches (e.g., camelCase -> camel_Case)
3639
let spaced = regex.stringByReplacingMatches(in: sanitized, options: [], range: range, withTemplate: "_")
3740

41+
// Normalize by lowercasing and rejoining with single underscores
3842
return spaced
3943
.split(separator: "_")
4044
.map { $0.lowercased() }
@@ -51,33 +55,51 @@ extension String {
5155
/// - Parameter target: The string to compare against.
5256
/// - Returns: The number of edits needed to match the target.
5357
func levenshteinDistance(to target: String) -> Int {
58+
59+
// Convert both strings into character arrays for index-based access
5460
let source = Array(self)
5561
let target = Array(target)
5662

63+
// If either string is empty, the distance is simply the length of the other
5764
guard !source.isEmpty else { return target.count }
5865
guard !target.isEmpty else { return source.count }
5966

67+
// Initialize the previous row of distances (edit distances from empty string to target)
6068
var previous = Array(0...target.count)
69+
70+
// Prepare the current row to be computed during iteration
6171
var current = [Int](repeating: 0, count: target.count + 1)
6272

73+
// Loop through each character in the source string
6374
for i in 1...source.count {
75+
6476
current[0] = i
77+
78+
// Calculate cost: 0 if characters match, 1 otherwise
6579
for j in 1...target.count {
80+
81+
// Compute the minimum edit distance considering insertion, deletion, and substitution
6682
let cost = source[i - 1] == target[j - 1] ? 0 : 1
83+
6784
current[j] = Swift.min(
6885
current[j - 1] + 1, // insertion
6986
previous[j] + 1, // deletion
7087
previous[j - 1] + cost // substitution
7188
)
7289
}
90+
91+
// Move current row to previous for next iteration
7392
swap(&previous, &current)
7493
}
7594

95+
// Final distance is the last value in the previous row
7696
return previous[target.count]
7797
}
7898
}
7999

80100
extension Dictionary where Key == String, Value == DynamicJSON {
101+
102+
/// Note: The Levenshtein distance is a metric for measuring the difference between two strings. Specifically, it calculates the minimum number of single-character edits required to transform one string into the other
81103
/// Attempts to find the best-matching key in the dictionary for a given lookup key.
82104
///
83105
/// Matching is performed in the following order:
@@ -90,34 +112,43 @@ extension Dictionary where Key == String, Value == DynamicJSON {
90112
/// - logMatch: A closure used to report fallback key matches for debugging.
91113
/// - Returns: The best matching `DynamicJSON` value, or `nil` if no reasonable match found.
92114
func fuzzyMatch(for key: String, logMatch: (_ original: String, _ matched: String) -> Void) -> DynamicJSON? {
115+
116+
// Normalize the input key for consistent comparison
93117
let normalized = key.normalizedKey()
94118

95-
// Try partial match
119+
// Step 1: Attempt partial match by checking if any stored key contains the normalized key
96120
if let partial = self.first(where: { $0.key.contains(normalized) }) {
121+
// Log and return the first partial match found
97122
logMatch(key, partial.key)
98123
return partial.value
99124
}
100125

101-
// Fuzzy match
126+
// Step 2: Perform fuzzy matching using Levenshtein distance
102127
let maxDistance = 2
103128
var bestMatchKey: String?
104129
var bestMatchValue: DynamicJSON?
130+
131+
// Initialize tracking variables for the best fuzzy match found
105132
var bestDistance = Int.max
106133

107134
for (storedKey, value) in self {
135+
// Compute the Levenshtein distance between normalized input and stored key
108136
let distance = normalized.levenshteinDistance(to: storedKey)
109137
if distance <= maxDistance && distance < bestDistance {
138+
// Update best match if the distance is within threshold and better than previous
110139
bestDistance = distance
111140
bestMatchKey = storedKey
112141
bestMatchValue = value
113142
}
114143
}
115144

145+
// If a suitable fuzzy match was found, return it
116146
if let key = bestMatchKey, let value = bestMatchValue {
117147
logMatch(key, key)
118148
return value
119149
}
120150

151+
// No match found; return nil
121152
return nil
122153
}
123154
}

0 commit comments

Comments
 (0)