From 37ddd3637b9ad7ae61b08545bb133f2ab1e32bc8 Mon Sep 17 00:00:00 2001 From: Anya Mallon Date: Wed, 5 Feb 2025 12:32:31 +0100 Subject: [PATCH 1/2] BookmarkCoreDataImporter updated to support BookmarksImportSummary --- Package.swift | 3 ++- .../BookmarkCoreDataImporter.swift | 25 ++++++++++++++----- 2 files changed, 21 insertions(+), 7 deletions(-) diff --git a/Package.swift b/Package.swift index a06efdbd5..29d0c2da0 100644 --- a/Package.swift +++ b/Package.swift @@ -114,8 +114,9 @@ let package = Package( .target( name: "Bookmarks", dependencies: [ + "BrowserServicesKit", "Persistence", - "Common", + "Common" ], resources: [ .process("BookmarksModel.xcdatamodeld") diff --git a/Sources/Bookmarks/ImportExport/BookmarkCoreDataImporter.swift b/Sources/Bookmarks/ImportExport/BookmarkCoreDataImporter.swift index 6c2dd512b..36664ef1c 100644 --- a/Sources/Bookmarks/ImportExport/BookmarkCoreDataImporter.swift +++ b/Sources/Bookmarks/ImportExport/BookmarkCoreDataImporter.swift @@ -19,6 +19,7 @@ import Foundation import CoreData import Persistence +import BrowserServicesKit public class BookmarkCoreDataImporter { @@ -30,9 +31,9 @@ public class BookmarkCoreDataImporter { self.favoritesDisplayMode = favoritesDisplayMode } - public func importBookmarks(_ bookmarks: [BookmarkOrFolder]) async throws { + public func importBookmarks(_ bookmarks: [BookmarkOrFolder]) async throws -> BookmarksImportSummary { - try await withCheckedThrowingContinuation { (continuation: CheckedContinuation) in + return try await withCheckedThrowingContinuation { (continuation: CheckedContinuation) in context.performAndWait { () in do { @@ -43,13 +44,15 @@ public class BookmarkCoreDataImporter { } var bookmarkURLToIDMap = try bookmarkURLToID(in: context) + var summary = BookmarksImportSummary(successful: 0, duplicates: 0, failed: 0) try recursivelyCreateEntities(from: bookmarks, parent: topLevelBookmarksFolder, favoritesFolders: favoritesFolders, - bookmarkURLToIDMap: &bookmarkURLToIDMap) + bookmarkURLToIDMap: &bookmarkURLToIDMap, + summary: &summary) try context.save() - continuation.resume() + continuation.resume(returning: summary) } catch { continuation.resume(throwing: error) } @@ -90,9 +93,11 @@ public class BookmarkCoreDataImporter { private func recursivelyCreateEntities(from bookmarks: [BookmarkOrFolder], parent: BookmarkEntity, favoritesFolders: [BookmarkEntity], - bookmarkURLToIDMap: inout [String: NSManagedObjectID]) throws { + bookmarkURLToIDMap: inout [String: NSManagedObjectID], + summary: inout BookmarksImportSummary) throws { for bookmarkOrFolder in bookmarks { if bookmarkOrFolder.isInvalidBookmark { + summary.failed += 1 continue } @@ -105,7 +110,8 @@ public class BookmarkCoreDataImporter { try recursivelyCreateEntities(from: children, parent: folder, favoritesFolders: favoritesFolders, - bookmarkURLToIDMap: &bookmarkURLToIDMap) + bookmarkURLToIDMap: &bookmarkURLToIDMap, + summary: &summary) } case .favorite: if let url = bookmarkOrFolder.url { @@ -120,11 +126,15 @@ public class BookmarkCoreDataImporter { newFavorite.addToFavorites(folders: favoritesFolders) bookmarkURLToIDMap[url.absoluteString] = newFavorite.objectID } + summary.successful += 1 + } else { + summary.failed += 1 } case .bookmark: if let url = bookmarkOrFolder.url { if parent.isRoot, parent.childrenArray.first(where: { $0.urlObject == url }) != nil { + summary.successful += 1 continue } else { let newBookmark = BookmarkEntity.makeBookmark(title: bookmarkOrFolder.name, @@ -133,6 +143,9 @@ public class BookmarkCoreDataImporter { context: context) bookmarkURLToIDMap[url.absoluteString] = newBookmark.objectID } + summary.successful += 1 + } else { + summary.failed += 1 } } } From 3f9f0941cdb9290c85ea10e36eab3107d3ff76c0 Mon Sep 17 00:00:00 2001 From: Anya Mallon Date: Mon, 17 Feb 2025 13:04:40 +0100 Subject: [PATCH 2/2] CSVImporter updated with de-duplication based on eTLD+1 --- .../Matchers/AutofillUrlMatcher.swift | 7 + .../DataImport/Logins/CSV/CSVImporter.swift | 151 ++++++++++++++++-- .../DataImport/Logins/LoginImport.swift | 4 +- .../SecureVault/SecureVaultModels.swift | 9 +- 4 files changed, 148 insertions(+), 23 deletions(-) diff --git a/Sources/BrowserServicesKit/Autofill/Matchers/AutofillUrlMatcher.swift b/Sources/BrowserServicesKit/Autofill/Matchers/AutofillUrlMatcher.swift index 33c9479de..f1f6d3514 100644 --- a/Sources/BrowserServicesKit/Autofill/Matchers/AutofillUrlMatcher.swift +++ b/Sources/BrowserServicesKit/Autofill/Matchers/AutofillUrlMatcher.swift @@ -73,4 +73,11 @@ public struct AutofillDomainNameUrlMatcher: AutofillUrlMatcher { return URLComponents(string: "\(URL.URLProtocol.https.scheme)\(noScheme)") } + public func extractTLD(domain: String, tld: TLD) -> String? { + guard var urlComponents = normalizeSchemeForAutofill(domain) else { return nil } + guard urlComponents.host != .localhost else { return domain } + return urlComponents.eTLDplus1WithPort(tld: tld) + + } + } diff --git a/Sources/BrowserServicesKit/DataImport/Logins/CSV/CSVImporter.swift b/Sources/BrowserServicesKit/DataImport/Logins/CSV/CSVImporter.swift index 67df5a551..fa53f1725 100644 --- a/Sources/BrowserServicesKit/DataImport/Logins/CSV/CSVImporter.swift +++ b/Sources/BrowserServicesKit/DataImport/Logins/CSV/CSVImporter.swift @@ -158,32 +158,35 @@ final public class CSVImporter: DataImporter { private let loginImporter: LoginImporter private let defaultColumnPositions: ColumnPositions? private let secureVaultReporter: SecureVaultReporting + private let tld: TLD - public init(fileURL: URL?, csvContent: String? = nil, loginImporter: LoginImporter, defaultColumnPositions: ColumnPositions?, reporter: SecureVaultReporting) { + public init(fileURL: URL?, csvContent: String? = nil, loginImporter: LoginImporter, defaultColumnPositions: ColumnPositions?, reporter: SecureVaultReporting, tld: TLD) { self.fileURL = fileURL self.csvContent = csvContent self.loginImporter = loginImporter self.defaultColumnPositions = defaultColumnPositions self.secureVaultReporter = reporter + self.tld = tld } - static func totalValidLogins(in fileURL: URL, defaultColumnPositions: ColumnPositions?) -> Int { + static func totalValidLogins(in fileURL: URL, defaultColumnPositions: ColumnPositions?, tld: TLD) -> Int { guard let fileContents = try? String(contentsOf: fileURL, encoding: .utf8) else { return 0 } - let logins = extractLogins(from: fileContents, defaultColumnPositions: defaultColumnPositions) ?? [] + let logins = extractLogins(from: fileContents, defaultColumnPositions: defaultColumnPositions, tld: tld) ?? [] return logins.count } - static public func totalValidLogins(in csvContent: String, defaultColumnPositions: ColumnPositions?) -> Int { - let logins = extractLogins(from: csvContent, defaultColumnPositions: defaultColumnPositions) ?? [] - + static public func totalValidLogins(in csvContent: String, defaultColumnPositions: ColumnPositions?, tld: TLD) -> Int { + let logins = extractLogins(from: csvContent, defaultColumnPositions: defaultColumnPositions, tld: tld) ?? [] return logins.count } - public static func extractLogins(from fileContents: String, defaultColumnPositions: ColumnPositions? = nil) -> [ImportedLoginCredential]? { + public static func extractLogins(from fileContents: String, defaultColumnPositions: ColumnPositions? = nil, tld: TLD) -> [ImportedLoginCredential]? { guard let parsed = try? CSVParser().parse(string: fileContents) else { return nil } + let urlMatcher = AutofillDomainNameUrlMatcher() + let columnPositions: ColumnPositions? var startRow = 0 if let autodetected = ColumnPositions(csv: parsed) { @@ -195,7 +198,9 @@ final public class CSVImporter: DataImporter { guard parsed.indices.contains(startRow) else { return [] } // no data - let result = parsed[startRow...].compactMap(columnPositions.read) + let result = parsed[startRow...].compactMap { row in + columnPositions.read(row, tld: tld, urlMatcher: urlMatcher) + } guard !result.isEmpty else { if parsed.filter({ !$0.isEmpty }).isEmpty { @@ -205,7 +210,7 @@ final public class CSVImporter: DataImporter { } } - return result + return result.removeDuplicates() } public var importableTypes: [DataImport.DataType] { @@ -245,7 +250,7 @@ final public class CSVImporter: DataImporter { do { try updateProgress(.importingPasswords(numberOfPasswords: nil, fraction: 0.2)) - let loginCredentials = try Self.extractLogins(from: fileContents, defaultColumnPositions: defaultColumnPositions) ?? { + let loginCredentials = try Self.extractLogins(from: fileContents, defaultColumnPositions: defaultColumnPositions, tld: tld) ?? { try Task.checkCancellation() throw LoginImporterError(error: nil, type: .malformedCSV) }() @@ -291,7 +296,7 @@ extension ImportedLoginCredential { extension CSVImporter.ColumnPositions { - func read(_ row: [String]) -> ImportedLoginCredential? { + func read(_ row: [String], tld: TLD, urlMatcher: AutofillDomainNameUrlMatcher) -> ImportedLoginCredential? { let username: String let password: String @@ -316,18 +321,30 @@ extension CSVImporter.ColumnPositions { return nil } + var url: String? = row[safe: urlIndex ?? -1] + var eTldPlusOne: String? + + if let urlString = url { + url = urlMatcher.normalizeUrlForWeb(urlString) + if let normalizedUrl = url { + eTldPlusOne = urlMatcher.extractTLD(domain: URL(string: normalizedUrl)?.host ?? normalizedUrl, tld: tld) ?? normalizedUrl + } + } + return ImportedLoginCredential(title: row[safe: titleIndex ?? -1], - url: row[safe: urlIndex ?? -1], + url: url, + eTldPlusOne: eTldPlusOne, username: username, password: password, notes: row[safe: notesIndex ?? -1]) + } } extension CSVImporter.ColumnPositions? { - func read(_ row: [String]) -> ImportedLoginCredential? { + func read(_ row: [String], tld: TLD, urlMatcher: AutofillDomainNameUrlMatcher) -> ImportedLoginCredential? { let columnPositions = self ?? [ .rowFormatWithTitle, .rowFormatWithoutTitle @@ -335,7 +352,113 @@ extension CSVImporter.ColumnPositions? { row.count > $0.maximumIndex }) - return columnPositions?.read(row) + return columnPositions?.read(row, tld: tld, urlMatcher: urlMatcher) } } + +extension Array where Element == ImportedLoginCredential { + + func removeDuplicates() -> [ImportedLoginCredential] { + // First, group credentials by their identifying key + var credentialGroups: [String: [ImportedLoginCredential]] = [:] + + forEach { credential in + // special handling for titles with Safari format e.g. "example.com (username)" + let title = titleMatchesSafariFormat(for: credential) ? "SAFARI_TITLE" : credential.title ?? "" + let key = "\(credential.eTldPlusOne ?? "")|" + title + "|" + + "\(credential.username)|\(credential.password)|\(credential.notes ?? "")" + + if credentialGroups[key] == nil { + credentialGroups[key] = [] + } + credentialGroups[key]?.append(credential) + } + + var uniqueCredentials: [ImportedLoginCredential] = [] + + // Process each group + for (_, credentials) in credentialGroups { + // Only process as duplicates if we have multiple credentials with the exact same key + if credentials.count > 1 { + // Among the duplicates, select the one with the highest level TLD + if let selectedCredential = selectPreferredCredential(from: credentials) { + uniqueCredentials.append(selectedCredential) + } + } else if let singleCredential = credentials.first { + // If there's only one credential with this key, it's automatically unique + uniqueCredentials.append(singleCredential) + } + } + + return uniqueCredentials + } + + private func selectPreferredCredential(from credentials: [ImportedLoginCredential]) -> ImportedLoginCredential? { + guard !credentials.isEmpty else { return nil } + + // If there's only one credential, return it + if credentials.count == 1 { + return credentials[0] + } + + // First, try to find a credential without subdomains (e.g. site.com or site.co.uk) + if let noSubdomainCredential = credentials.first(where: { credential in + guard let url = credential.url, let eTldPlusOne = credential.eTldPlusOne else { return false } + // The URL should match the TLD exactly (meaning it's the base domain without subdomains) + return url == eTldPlusOne + }) { + return noSubdomainCredential + } + + // Look for www subdomain if no bare domain exists + if let wwwCredential = credentials.first(where: { credential in + guard let url = credential.url, let eTldPlusOne = credential.eTldPlusOne else { return false } + let components = url.split(separator: ".") + // Check first component is www AND rest matches TLD + return components.first == "www" && components.dropFirst().joined(separator: ".") == eTldPlusOne + }) { + return wwwCredential + } + + // If neither bare domain nor www exists, sort remaining by: + // 1. Number of segments (fewer is better) + // 2. Alphabetically by domain + return credentials.min { credential1, credential2 in + let segments1 = getDomainSegments(from: credential1.url) + let segments2 = getDomainSegments(from: credential2.url) + + if segments1 != segments2 { + return segments1 < segments2 + } + + // If segment counts are equal, compare URLs alphabetically + let url1 = credential1.url ?? "" + let url2 = credential2.url ?? "" + return url1 < url2 + } + } + + private func getDomainSegments(from url: String?) -> Int { + guard let url = url else { return Int.max } + return url.split(separator: ".").count + } + + private func titleMatchesSafariFormat(for credential: ImportedLoginCredential) -> Bool { + guard let title = credential.title, let url = credential.url else { return false } + + let components = title.components(separatedBy: " (") + guard components.count == 2, + components[1].hasSuffix(")"), + let username = components[1].dropLast().toString else { + return false + } + + return url.contains(components[0]) && username == credential.username + } + +} + +extension StringProtocol { + var toString: String? { String(self) } +} diff --git a/Sources/BrowserServicesKit/DataImport/Logins/LoginImport.swift b/Sources/BrowserServicesKit/DataImport/Logins/LoginImport.swift index f3c8c3749..c3e4d71c7 100644 --- a/Sources/BrowserServicesKit/DataImport/Logins/LoginImport.swift +++ b/Sources/BrowserServicesKit/DataImport/Logins/LoginImport.swift @@ -23,13 +23,15 @@ public struct ImportedLoginCredential: Equatable { public let title: String? public let url: String? + public let eTldPlusOne: String? public let username: String public let password: String public let notes: String? - public init(title: String? = nil, url: String?, username: String, password: String, notes: String? = nil) { + public init(title: String? = nil, url: String?, eTldPlusOne: String? = nil, username: String, password: String, notes: String? = nil) { self.title = title self.url = url.flatMap(URL.init(string:))?.host ?? url // Try to use the host if possible, as the Secure Vault saves credentials using the host. + self.eTldPlusOne = eTldPlusOne self.username = username self.password = password self.notes = notes diff --git a/Sources/BrowserServicesKit/SecureVault/SecureVaultModels.swift b/Sources/BrowserServicesKit/SecureVault/SecureVaultModels.swift index 2ba635f1f..b52b79380 100644 --- a/Sources/BrowserServicesKit/SecureVault/SecureVaultModels.swift +++ b/Sources/BrowserServicesKit/SecureVault/SecureVaultModels.swift @@ -545,7 +545,7 @@ extension Array where Element == SecureVaultModels.WebsiteAccount { public func sortedForDomain(_ targetDomain: String, tld: TLD, removeDuplicates: Bool = false, urlMatcher: AutofillDomainNameUrlMatcher = AutofillDomainNameUrlMatcher()) -> [SecureVaultModels.WebsiteAccount] { - guard let targetTLD = extractTLD(domain: targetDomain, tld: tld, urlMatcher: urlMatcher) else { + guard let targetTLD = urlMatcher.extractTLD(domain: targetDomain, tld: tld) else { return [] } @@ -601,13 +601,6 @@ extension Array where Element == SecureVaultModels.WebsiteAccount { return deduplicatedAccounts.sorted { compareAccount($0, $1) } } - private func extractTLD(domain: String, tld: TLD, urlMatcher: AutofillDomainNameUrlMatcher) -> String? { - guard var urlComponents = urlMatcher.normalizeSchemeForAutofill(domain) else { return nil } - guard urlComponents.host != .localhost else { return domain } - return urlComponents.eTLDplus1WithPort(tld: tld) - - } - // Last Used > Last Updated > Alphabetical Domain > Alphabetical Username > Empty Usernames private func compareAccount(_ account1: SecureVaultModels.WebsiteAccount, _ account2: SecureVaultModels.WebsiteAccount) -> Bool { let username1 = account1.username ?? ""