Skip to content

Commit c3b2c4a

Browse files
authored
Add ability to parse a Takeout zip file and extract+import bookmarks (#6820)
Task/Issue URL: https://app.asana.com/1/137249556945/project/608920331025315/task/1211197010246723?focus=true ### Description Adds ability to parse a Google Takeout zip file containing exported bookmarks. This PR only adds underlying logic and dev settings to test it; prod UI isn't changed. ### Steps to test this PR ### Importing a valid file - [x] Generate a Takeout zip file containing bookmarks. See "Instructions for generating a Takeout zip file" below. - [x] Fresh install of `internal` build type - [x] launch `Autofill Dev Settings`, and scroll to `Import Bookmarks` dev settings 🆕 - [x] tap `Choose zip file (downloaded from Takeout)` and choose the Takeout zip; verify the bookmarks imported using the `View Bookmarks` button ### Importing an invalid file - [x] tap `Choose zip file (downloaded from Takeout)` and choose a random other file (either not a Takeout zip or one that doesn't contain bookmarks) and verify it just reports it as invalid as a snackbar #### Instructions for generating a Takeout zip file Tap `Launch Google Takeout (normal tab)` from `Autofill Dev Settings` and use this to generate a zip file for testing: - [ ] sign in - [ ] Tap on `Deselect All` - [ ] Scroll down to `Chrome`, and check it - [ ] Optionally, Tap on `All Chrome data included` and choose only bookmarks - [ ] Scroll all the way down to the bottom and tap `Next step` - [ ] Leave all settings as is, and choose `Create export` - [ ] Wait a few seconds, then refresh the page until you see `Your latest export` shows the export you just created (at the top of the page) - [ ] Tap `Download` and confirm you want to save it
1 parent f3d2f9c commit c3b2c4a

File tree

19 files changed

+695
-12
lines changed

19 files changed

+695
-12
lines changed

app/src/test/java/com/duckduckgo/app/bookmarks/model/SavedSitesParserTest.kt

Lines changed: 34 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@ import com.duckduckgo.savedsites.api.models.SavedSite.Bookmark
3232
import com.duckduckgo.savedsites.api.models.SavedSite.Favorite
3333
import com.duckduckgo.savedsites.api.models.SavedSitesNames
3434
import com.duckduckgo.savedsites.api.models.TreeNode
35+
import com.duckduckgo.savedsites.api.service.SavedSitesImporter.ImportFolder
3536
import com.duckduckgo.savedsites.impl.MissingEntitiesRelationReconciler
3637
import com.duckduckgo.savedsites.impl.RealFavoritesDelegate
3738
import com.duckduckgo.savedsites.impl.RealSavedSitesRepository
@@ -156,7 +157,7 @@ class SavedSitesParserTest {
156157
val inputStream = FileUtilities.loadResource(javaClass.classLoader!!, "bookmarks/bookmarks_invalid.html")
157158
val document = Jsoup.parse(inputStream, Charsets.UTF_8.name(), "duckduckgo.com")
158159

159-
val bookmarks = parser.parseHtml(document, repository)
160+
val bookmarks = parser.parseHtml(document, repository, ImportFolder.Root)
160161

161162
assertTrue(bookmarks.isEmpty())
162163
}
@@ -166,7 +167,7 @@ class SavedSitesParserTest {
166167
val inputStream = FileUtilities.loadResource(javaClass.classLoader!!, "bookmarks/bookmarks_firefox.html")
167168
val document = Jsoup.parse(inputStream, Charsets.UTF_8.name(), "duckduckgo.com")
168169

169-
val bookmarks = parser.parseHtml(document, repository).filterIsInstance<Bookmark>()
170+
val bookmarks = parser.parseHtml(document, repository, ImportFolder.Root).filterIsInstance<Bookmark>()
170171

171172
assertEquals(17, bookmarks.size)
172173

@@ -185,7 +186,7 @@ class SavedSitesParserTest {
185186
val inputStream = FileUtilities.loadResource(javaClass.classLoader!!, "bookmarks/bookmarks_brave.html")
186187
val document = Jsoup.parse(inputStream, Charsets.UTF_8.name(), "duckduckgo.com")
187188

188-
val bookmarks = parser.parseHtml(document, repository).filterIsInstance<Bookmark>()
189+
val bookmarks = parser.parseHtml(document, repository, ImportFolder.Root).filterIsInstance<Bookmark>()
189190

190191
assertEquals(12, bookmarks.size)
191192

@@ -206,7 +207,7 @@ class SavedSitesParserTest {
206207
val inputStream = FileUtilities.loadResource(javaClass.classLoader!!, "bookmarks/bookmarks_chrome.html")
207208
val document = Jsoup.parse(inputStream, Charsets.UTF_8.name(), "duckduckgo.com")
208209

209-
val bookmarks = parser.parseHtml(document, repository).filterIsInstance<Bookmark>()
210+
val bookmarks = parser.parseHtml(document, repository, ImportFolder.Root).filterIsInstance<Bookmark>()
210211

211212
assertEquals(12, bookmarks.size)
212213

@@ -228,7 +229,7 @@ class SavedSitesParserTest {
228229
val inputStream = FileUtilities.loadResource(javaClass.classLoader!!, "bookmarks/bookmarks_ddg_android.html")
229230
val document = Jsoup.parse(inputStream, Charsets.UTF_8.name(), "duckduckgo.com")
230231

231-
val bookmarks = parser.parseHtml(document, repository).filterNot { it is BookmarkFolder }
232+
val bookmarks = parser.parseHtml(document, repository, ImportFolder.Root).filterNot { it is BookmarkFolder }
232233

233234
assertEquals(13, bookmarks.size)
234235

@@ -250,7 +251,7 @@ class SavedSitesParserTest {
250251
val inputStream = FileUtilities.loadResource(javaClass.classLoader!!, "bookmarks/bookmarks_ddg_macos.html")
251252
val document = Jsoup.parse(inputStream, Charsets.UTF_8.name(), "duckduckgo.com")
252253

253-
val bookmarks = parser.parseHtml(document, repository).filterIsInstance<Bookmark>()
254+
val bookmarks = parser.parseHtml(document, repository, ImportFolder.Root).filterIsInstance<Bookmark>()
254255

255256
assertEquals(13, bookmarks.size)
256257

@@ -273,7 +274,7 @@ class SavedSitesParserTest {
273274
val inputStream = FileUtilities.loadResource(javaClass.classLoader!!, "bookmarks/bookmarks_safari.html")
274275
val document = Jsoup.parse(inputStream, Charsets.UTF_8.name(), "duckduckgo.com")
275276

276-
val bookmarks = parser.parseHtml(document, repository).filterIsInstance<Bookmark>()
277+
val bookmarks = parser.parseHtml(document, repository, ImportFolder.Root).filterIsInstance<Bookmark>()
277278

278279
assertEquals(14, bookmarks.size)
279280

@@ -295,7 +296,7 @@ class SavedSitesParserTest {
295296
val inputStream = FileUtilities.loadResource(javaClass.classLoader!!, "bookmarks/bookmarks_favorites_ddg.html")
296297
val document = Jsoup.parse(inputStream, Charsets.UTF_8.name(), "duckduckgo.com")
297298

298-
val savedSites = parser.parseHtml(document, repository)
299+
val savedSites = parser.parseHtml(document, repository, ImportFolder.Root)
299300

300301
val favorites = savedSites.filterIsInstance<Favorite>()
301302
val bookmarks = savedSites.filterIsInstance<Bookmark>()
@@ -367,4 +368,29 @@ class SavedSitesParserTest {
367368
assertEquals(3, favoritesLists.size)
368369
assertEquals(9, bookmarks.size)
369370
}
371+
372+
@Test
373+
fun canImportBookmarksWithDestinationFolder() = runTest {
374+
val inputStream = FileUtilities.loadResource(javaClass.classLoader!!, "bookmarks/bookmarks_chrome.html")
375+
val document = Jsoup.parse(inputStream, Charsets.UTF_8.name(), "duckduckgo.com")
376+
377+
val folderName = "Imported Bookmarks"
378+
val savedSites = parser.parseHtml(document, repository, ImportFolder.Folder(folderName))
379+
val bookmarks = savedSites.filterIsInstance<Bookmark>()
380+
381+
// Should import bookmarks successfully
382+
assertTrue("Should import bookmarks with folder destination", bookmarks.isNotEmpty())
383+
384+
// Verify that a destination folder was created in the repository
385+
val createdFolders = repository.getFolderTreeItems(SavedSitesNames.BOOKMARKS_ROOT)
386+
.filter { it.name == folderName && it.url == null }
387+
388+
assertTrue("Should create destination folder in repository", createdFolders.isNotEmpty())
389+
390+
val destinationFolderId = createdFolders.first().id
391+
392+
// Verify that bookmarks reference the destination folder as their parent
393+
val bookmarksInDestinationFolder = bookmarks.filter { it.parentId == destinationFolderId }
394+
assertTrue("Bookmarks should be placed in destination folder", bookmarksInDestinationFolder.isNotEmpty())
395+
}
370396
}

autofill/autofill-impl/build.gradle

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,7 @@ dependencies {
4747
testImplementation project(':feature-toggles-test')
4848
implementation project(path: ':settings-api') // temporary until we release new settings
4949
implementation project(':library-loader-api')
50+
implementation project(':saved-sites-api')
5051

5152
anvil project(path: ':anvil-compiler')
5253
implementation project(path: ':anvil-annotations')
Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,72 @@
1+
/*
2+
* Copyright (c) 2025 DuckDuckGo
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
17+
package com.duckduckgo.autofill.impl.importing.takeout.processor
18+
19+
import android.net.Uri
20+
import com.duckduckgo.common.utils.DispatcherProvider
21+
import com.duckduckgo.di.scopes.AppScope
22+
import com.duckduckgo.savedsites.api.service.ImportSavedSitesResult
23+
import com.duckduckgo.savedsites.api.service.SavedSitesImporter
24+
import com.duckduckgo.savedsites.api.service.SavedSitesImporter.ImportFolder
25+
import com.squareup.anvil.annotations.ContributesBinding
26+
import java.io.File
27+
import javax.inject.Inject
28+
import kotlinx.coroutines.withContext
29+
30+
/**
31+
* Interface for importing bookmarks with flexible destination handling.
32+
* Supports both root-level imports and folder-based imports while preserving structure.
33+
*/
34+
interface TakeoutBookmarkImporter {
35+
36+
/**
37+
* Imports bookmarks from HTML content to the specified destination.
38+
* @param htmlContent The HTML bookmark content to import (in Netscape format)
39+
* @param destination Where to import the bookmarks (Root or named Folder within bookmarks root)
40+
* @return ImportSavedSitesResult indicating success with imported items or error
41+
*/
42+
suspend fun importBookmarks(htmlContent: String, destination: ImportFolder): ImportSavedSitesResult
43+
}
44+
45+
@ContributesBinding(AppScope::class)
46+
class RealTakeoutBookmarkImporter @Inject constructor(
47+
private val savedSitesImporter: SavedSitesImporter,
48+
private val dispatchers: DispatcherProvider,
49+
) : TakeoutBookmarkImporter {
50+
51+
override suspend fun importBookmarks(htmlContent: String, destination: ImportFolder): ImportSavedSitesResult {
52+
return withContext(dispatchers.io()) {
53+
import(htmlContent = htmlContent, destination = destination)
54+
}
55+
}
56+
57+
private suspend fun import(htmlContent: String, destination: ImportFolder = ImportFolder.Root): ImportSavedSitesResult {
58+
return try {
59+
// saved sites importer needs a file uri, so we create a temp file here
60+
val tempFile = File.createTempFile("bookmark_import_", ".html")
61+
try {
62+
tempFile.writeText(htmlContent)
63+
return savedSitesImporter.import(Uri.fromFile(tempFile), destination)
64+
} finally {
65+
// delete the temp file after import
66+
tempFile.takeIf { it.exists() }?.delete()
67+
}
68+
} catch (exception: Exception) {
69+
ImportSavedSitesResult.Error(exception)
70+
}
71+
}
72+
}
Lines changed: 103 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,103 @@
1+
/*
2+
* Copyright (c) 2025 DuckDuckGo
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
17+
package com.duckduckgo.autofill.impl.importing.takeout.zip
18+
19+
import android.content.Context
20+
import android.net.Uri
21+
import com.duckduckgo.autofill.impl.importing.takeout.zip.TakeoutBookmarkExtractor.ExtractionResult
22+
import com.duckduckgo.autofill.impl.importing.takeout.zip.ZipEntryContentReader.ReadResult
23+
import com.duckduckgo.common.utils.DispatcherProvider
24+
import com.duckduckgo.di.scopes.AppScope
25+
import com.squareup.anvil.annotations.ContributesBinding
26+
import java.util.zip.ZipEntry
27+
import java.util.zip.ZipInputStream
28+
import javax.inject.Inject
29+
import kotlinx.coroutines.withContext
30+
import logcat.LogPriority.WARN
31+
import logcat.logcat
32+
33+
interface TakeoutBookmarkExtractor {
34+
35+
sealed class ExtractionResult {
36+
data class Success(val bookmarkHtmlContent: String) : ExtractionResult() {
37+
override fun toString(): String {
38+
return "ExtractionResult=success"
39+
}
40+
}
41+
data class Error(val exception: Exception) : ExtractionResult()
42+
}
43+
44+
/**
45+
* Extracts the bookmark HTML content from the provided Google Takeout ZIP file URI.
46+
* @param fileUri The URI of the Google Takeout ZIP file containing the bookmarks.
47+
* @return ExtractionResult containing either the bookmark HTML content or an error.
48+
*/
49+
suspend fun extractBookmarksHtml(fileUri: Uri): ExtractionResult
50+
}
51+
52+
@ContributesBinding(AppScope::class)
53+
class TakeoutZipBookmarkExtractor @Inject constructor(
54+
private val context: Context,
55+
private val dispatchers: DispatcherProvider,
56+
private val zipEntryContentReader: ZipEntryContentReader,
57+
) : TakeoutBookmarkExtractor {
58+
59+
override suspend fun extractBookmarksHtml(fileUri: Uri): ExtractionResult {
60+
return withContext(dispatchers.io()) {
61+
runCatching {
62+
context.contentResolver.openInputStream(fileUri)?.use { inputStream ->
63+
ZipInputStream(inputStream).use { zipInputStream ->
64+
processZipEntries(zipInputStream)
65+
}
66+
} ?: ExtractionResult.Error(Exception("Unable to open file: $fileUri"))
67+
}.getOrElse { ExtractionResult.Error(Exception(it)) }
68+
}
69+
}
70+
71+
private fun processZipEntries(zipInputStream: ZipInputStream): ExtractionResult {
72+
var entry = zipInputStream.nextEntry
73+
74+
if (entry == null) {
75+
logcat(WARN) { "No entries found in ZIP stream" }
76+
return ExtractionResult.Error(Exception("Invalid or empty ZIP file"))
77+
}
78+
79+
while (entry != null) {
80+
val entryName = entry.name
81+
logcat { "Processing zip entry '$entryName'" }
82+
83+
if (isBookmarkEntry(entry)) {
84+
return when (val readResult = zipEntryContentReader.readAndValidateContent(zipInputStream, entryName)) {
85+
is ReadResult.Success -> ExtractionResult.Success(readResult.content)
86+
is ReadResult.Error -> ExtractionResult.Error(readResult.exception)
87+
}
88+
}
89+
90+
entry = zipInputStream.nextEntry
91+
}
92+
93+
return ExtractionResult.Error(Exception("Chrome/Bookmarks.html not found in file"))
94+
}
95+
96+
private fun isBookmarkEntry(entry: ZipEntry): Boolean {
97+
return !entry.isDirectory && entry.name.endsWith(EXPECTED_BOOKMARKS_FILENAME, ignoreCase = true)
98+
}
99+
100+
companion object {
101+
private const val EXPECTED_BOOKMARKS_FILENAME = "Chrome/Bookmarks.html"
102+
}
103+
}
Lines changed: 84 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,84 @@
1+
/*
2+
* Copyright (c) 2025 DuckDuckGo
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
17+
package com.duckduckgo.autofill.impl.importing.takeout.zip
18+
19+
import com.duckduckgo.di.scopes.AppScope
20+
import com.squareup.anvil.annotations.ContributesBinding
21+
import java.util.zip.ZipInputStream
22+
import javax.inject.Inject
23+
import logcat.logcat
24+
25+
interface ZipEntryContentReader {
26+
27+
sealed class ReadResult {
28+
data class Success(val content: String) : ReadResult()
29+
data class Error(val exception: Exception) : ReadResult()
30+
}
31+
32+
fun readAndValidateContent(
33+
zipInputStream: ZipInputStream,
34+
entryName: String,
35+
): ReadResult
36+
}
37+
38+
@ContributesBinding(AppScope::class)
39+
class BookmarkZipEntryContentReader @Inject constructor() : ZipEntryContentReader {
40+
41+
override fun readAndValidateContent(
42+
zipInputStream: ZipInputStream,
43+
entryName: String,
44+
): ZipEntryContentReader.ReadResult {
45+
logcat { "Reading content from ZIP entry: '$entryName'" }
46+
47+
return try {
48+
val content = readContent(zipInputStream, entryName)
49+
50+
if (isValidBookmarkContent(content)) {
51+
logcat { "Content validation passed for: '$entryName'" }
52+
ZipEntryContentReader.ReadResult.Success(content)
53+
} else {
54+
logcat { "Content validation failed for: '$entryName'" }
55+
ZipEntryContentReader.ReadResult.Error(
56+
Exception("File content is not a valid bookmark file"),
57+
)
58+
}
59+
} catch (e: Exception) {
60+
logcat { "Error reading ZIP entry content: ${e.message}" }
61+
ZipEntryContentReader.ReadResult.Error(e)
62+
}
63+
}
64+
65+
private fun readContent(zipInputStream: ZipInputStream, entryName: String): String {
66+
val content = zipInputStream.bufferedReader(Charsets.UTF_8).use { it.readText() }
67+
logcat { "Read content from '$entryName', length: ${content.length}" }
68+
return content
69+
}
70+
71+
private fun isValidBookmarkContent(content: String): Boolean {
72+
val hasNetscapeHeader = content.contains(NETSCAPE_HEADER, ignoreCase = true)
73+
val hasBookmarkTitle = content.contains(BOOKMARK_TITLE, ignoreCase = true)
74+
75+
logcat { "Content validation: hasNetscapeHeader=$hasNetscapeHeader, hasBookmarkTitle=$hasBookmarkTitle" }
76+
77+
return hasNetscapeHeader || hasBookmarkTitle
78+
}
79+
80+
companion object {
81+
private const val NETSCAPE_HEADER = "<!DOCTYPE NETSCAPE-Bookmark-file"
82+
private const val BOOKMARK_TITLE = "<title>Bookmarks</title>"
83+
}
84+
}

autofill/autofill-impl/src/main/res/values/donottranslate.xml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,4 +16,8 @@
1616

1717
<resources>
1818

19+
<!-- Import Bookmarks Strings -->
20+
<string name="autofillImportBookmarksChromeFolderName">Imported from Chrome</string>
21+
22+
1923
</resources>

0 commit comments

Comments
 (0)