|
| 1 | +// |
| 2 | +// FuzzySearch.swift |
| 3 | +// CodeEdit |
| 4 | +// |
| 5 | +// Created by Tommy Ludwig on 04.05.23. |
| 6 | +// |
| 7 | + |
| 8 | +import Foundation |
| 9 | + |
| 10 | +enum FuzzySearch { |
| 11 | + /// Searches an array of view models for occurrences of a fuzzy search query. |
| 12 | + /// |
| 13 | + /// This function takes a fuzzy search `query` and an array of `URL`s, and returns a new array that contains only |
| 14 | + /// those url's that match the query. |
| 15 | + /// The function uses the `score` function to calculate a score for each url and |
| 16 | + /// includes only those url's whose scores are greater than 0.0. |
| 17 | + /// The resulting array is then sorted by a score, in descending order. |
| 18 | + /// |
| 19 | + /// - Parameters: |
| 20 | + /// - query: A `String` value representing the fuzzy search query. |
| 21 | + /// - urls: An array of `URL`s, each representing a file, to search within. |
| 22 | + /// - Returns: An array of `URL`s that match the fuzzy search query, sorted by score. |
| 23 | + static func search(query: String, in urls: [URL]) -> [URL] { |
| 24 | + let filteredResult = urls.filter { url -> Bool in |
| 25 | + let nameScore = score(query: query, url: url) |
| 26 | + return nameScore > 0.0 |
| 27 | + } |
| 28 | + |
| 29 | + let sortedResult = filteredResult.sorted { url1, url2 -> Bool in |
| 30 | + let nameScore1 = score(query: query, url: url1) |
| 31 | + let nameScore2 = score(query: query, url: url2) |
| 32 | + return nameScore1 > nameScore2 |
| 33 | + } |
| 34 | + |
| 35 | + return sortedResult |
| 36 | + } |
| 37 | + |
| 38 | + /// Calculates the score of the fuzzy search query against a text string. |
| 39 | + /// |
| 40 | + /// This function takes a fuzzy search `query` and a `text` string, |
| 41 | + /// and calculates a score based on how well the `query` matches the `text`. |
| 42 | + /// The function is case-insensitive and calculates the score by iterating through each token in the `query`, |
| 43 | + /// finding all occurrences of the token in the `text`, and calculating a proximity score for each occurrence. |
| 44 | + /// The final score is the average of all token scores weighted by their proximity scores. |
| 45 | + /// |
| 46 | + /// - Parameters: |
| 47 | + /// - query: A `String` value representing the fuzzy search query. |
| 48 | + /// - url: A `URL` value representing the filePath to search within. |
| 49 | + /// - Returns: A `Double` value representing the calculated score. |
| 50 | + private static func score(query: String, url: URL) -> Double { |
| 51 | + let query = query.lowercased() |
| 52 | + let text = url.lastPathComponent.lowercased() |
| 53 | + let queryTokens = query.split(separator: " ") |
| 54 | + var score: Double = 0.0 |
| 55 | + |
| 56 | + for token in queryTokens { |
| 57 | + let ranges = text.ranges(of: token) |
| 58 | + if !ranges.isEmpty { |
| 59 | + let tokenScore = Double(token.count) / Double(text.count) |
| 60 | + let proximityScore = proximityScoreForRanges(ranges, text: text) |
| 61 | + let levenshteinScore = Double(levenshteinDistance(from: String(token), to: text)) / Double(text.count) |
| 62 | + score += (tokenScore * proximityScore) * (1 - levenshteinScore) |
| 63 | + } |
| 64 | + } |
| 65 | + |
| 66 | + if let date = getLastModifiedDate(for: url.path) { |
| 67 | + return (score / Double(queryTokens.count)) * Double(calculateDateScore(for: date)) |
| 68 | + } else { |
| 69 | + return (score / Double(queryTokens.count)) |
| 70 | + } |
| 71 | + } |
| 72 | + |
| 73 | + /// Calculates the proximity score based on an array of ranges. |
| 74 | + /// |
| 75 | + /// This function takes an array of `Range<String.Index>` objects and calculates a proximity score. |
| 76 | + /// The higher the score, the closer the ranges are to each other in the original string. |
| 77 | + /// |
| 78 | + /// - Parameter ranges: An array of `Range<String.Index>` objects representing the positions of matched substrings. |
| 79 | + /// - Returns: A `Double` value representing the proximity score. |
| 80 | + private static func proximityScoreForRanges(_ ranges: [Range<String.Index>], text: String) -> Double { |
| 81 | + let sortedRanges = ranges.sorted(by: { $0.lowerBound < $1.lowerBound }) |
| 82 | + var score: Double = 1.0 |
| 83 | + |
| 84 | + for index in 1..<sortedRanges.count { |
| 85 | + let previousRange = sortedRanges[index - 1] |
| 86 | + let currentRange = sortedRanges[index] |
| 87 | + let distance = currentRange.lowerBound.utf16Offset(in: text) |
| 88 | + - previousRange.upperBound.utf16Offset(in: text) |
| 89 | + let proximity = 1.0 / Double(distance) |
| 90 | + score += proximity |
| 91 | + } |
| 92 | + return score / Double(sortedRanges.count) |
| 93 | + } |
| 94 | + |
| 95 | + /// Retrieve the last modification date for a given file path. |
| 96 | + /// |
| 97 | + /// This function attempts to retrieve the last modification date of a file located at the specified file path. |
| 98 | + /// If the file path is valid and the modification date can be retrieved, |
| 99 | + /// the function returns a `Date` object representing the modification date. |
| 100 | + /// If an error occurs or the file path is invalid, the function returns `nil`. |
| 101 | + /// |
| 102 | + /// - Parameter filePath: The file path for which to retrieve the last modification date. |
| 103 | + /// - Returns: The last modification date as a `Date?` (optional) value, |
| 104 | + /// or `nil` if an error occurs or the file path is invalid. |
| 105 | + private static func getLastModifiedDate(for filePath: String) -> Date? { |
| 106 | + let fileManger = FileManager.default |
| 107 | + do { |
| 108 | + let attributes = try fileManger.attributesOfItem(atPath: filePath) |
| 109 | + let modificationDate = attributes[.modificationDate] as? Date |
| 110 | + return modificationDate |
| 111 | + } catch { |
| 112 | + return nil |
| 113 | + } |
| 114 | + } |
| 115 | + |
| 116 | + /// Calculate the date score for a given file's modification date. |
| 117 | + /// |
| 118 | + /// This function calculates the date score based on the time difference |
| 119 | + /// between the current date and the file's modification date, |
| 120 | + /// using an exponential decay function with a half-life of 3600 seconds (1 hour). |
| 121 | + /// The score will be higher for more recently modified files. |
| 122 | + /// |
| 123 | + /// - Parameter modificationDate: The file's modification date. |
| 124 | + /// - Returns: The date score as a `Double` value. |
| 125 | + private static func calculateDateScore(for modificationDate: Date) -> Double { |
| 126 | + let now = Date() |
| 127 | + let timeDiff = now.timeIntervalSince(modificationDate) |
| 128 | + let halfLife: Double = 3600 // decay half-life in seconds |
| 129 | + let decayFactor = log(2) / halfLife |
| 130 | + let score = exp(-decayFactor * timeDiff) |
| 131 | + return score + 0.01 |
| 132 | + } |
| 133 | + |
| 134 | + /// Calculates the Levenshtein distance between two input strings. |
| 135 | + /// |
| 136 | + /// - Parameters: |
| 137 | + /// - sourceString: The source string to compare against the target string; |
| 138 | + /// - targetString: The target string to compare against the source string. |
| 139 | + /// - Returns: The Levenshtein distance between `sourceString` and `targetString`. |
| 140 | + /// An integer representing the minimum number of |
| 141 | + /// insertions, deletions, or substitutions required to transform the source string into the target string. |
| 142 | + private static func levenshteinDistance(from sourceString: String, to targetString: String) -> Int { |
| 143 | + let source = Array(sourceString) |
| 144 | + let target = Array(targetString) |
| 145 | + |
| 146 | + let sourceCount = source.count |
| 147 | + let targetCount = target.count |
| 148 | + |
| 149 | + guard sourceCount > 0 else { |
| 150 | + return targetCount |
| 151 | + } |
| 152 | + |
| 153 | + guard targetCount > 0 else { |
| 154 | + return sourceCount |
| 155 | + } |
| 156 | + |
| 157 | + var distanceMatrix = Array(repeating: Array(repeating: 0, count: targetCount + 1), count: sourceCount + 1) |
| 158 | + |
| 159 | + for rowIndex in 0...sourceCount { |
| 160 | + distanceMatrix[rowIndex][0] = rowIndex |
| 161 | + } |
| 162 | + |
| 163 | + for columnIndex in 0...targetCount { |
| 164 | + distanceMatrix[0][columnIndex] = columnIndex |
| 165 | + } |
| 166 | + |
| 167 | + for rowIndex in 1...sourceCount { |
| 168 | + for columnIndex in 1...targetCount { |
| 169 | + let cost = source[rowIndex - 1] == target[columnIndex - 1] ? 0 : 1 |
| 170 | + |
| 171 | + let deletionCost = distanceMatrix[rowIndex - 1][columnIndex] + 1 |
| 172 | + let insertionCost = distanceMatrix[rowIndex][columnIndex - 1] + 1 |
| 173 | + let substitutionCost = distanceMatrix[rowIndex - 1][columnIndex - 1] + cost |
| 174 | + |
| 175 | + distanceMatrix[rowIndex][columnIndex] = min(deletionCost, insertionCost, substitutionCost) |
| 176 | + } |
| 177 | + } |
| 178 | + |
| 179 | + return distanceMatrix[sourceCount][targetCount] |
| 180 | + } |
| 181 | +} |
| 182 | + |
| 183 | +extension String { |
| 184 | + /// This function is case-insensitive and returns an array of `Range<String.Index>` objects representing |
| 185 | + /// the positions of all occurrences of the `searchString` within the original string. |
| 186 | + /// |
| 187 | + /// - Parameter searchString: A `String` value to search for within the original string. |
| 188 | + /// - Returns: An array of `Range<String.Index>` objects representing the |
| 189 | + /// positions of all occurrences of `searchString`. |
| 190 | + func ranges(of searchString: String) -> [Range<String.Index>] { |
| 191 | + var result: [Range<String.Index>] = [] |
| 192 | + var searchStartIndex = startIndex |
| 193 | + while let range = self[searchStartIndex..<endIndex].range(of: searchString, options: .caseInsensitive) { |
| 194 | + result.append(range) |
| 195 | + searchStartIndex = range.upperBound |
| 196 | + } |
| 197 | + return result |
| 198 | + } |
| 199 | +} |
0 commit comments