Skip to content

Commit 64485a5

Browse files
authored
Improve algorithm to generate a filename from an image download (#896)
* Improve algorithm used to guess filenames for downloaded files * Tidy extraction logic and add additional test cases * Break evaluation earlier and add new test * Return .bin if no other filetype available * Remove unused constructor param * Add additional test * Removed confusing extension function from pendingDownload * Remove test that can be OS-dependent
1 parent ea3eef2 commit 64485a5

File tree

5 files changed

+260
-11
lines changed

5 files changed

+260
-11
lines changed
Lines changed: 152 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,152 @@
1+
/*
2+
* Copyright (c) 2020 DuckDuckGo
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
17+
package com.duckduckgo.app.browser.downloader
18+
19+
import org.junit.Assert.assertEquals
20+
import org.junit.Test
21+
22+
class UriUtilsFilenameExtractorTest {
23+
24+
private val testee: FilenameExtractor = FilenameExtractor()
25+
26+
@Test
27+
fun whenUrlEndsWithFilenameAsJpgNoMimeOrContentDispositionThenFilenameShouldBeExtracted() {
28+
val url = "https://example.com/realFilename.jpg"
29+
val mimeType: String? = null
30+
val contentDisposition: String? = null
31+
val extracted = testee.extract(buildPendingDownload(url, contentDisposition, mimeType))
32+
assertEquals("realFilename.jpg", extracted)
33+
}
34+
35+
@Test
36+
fun whenUrlContainsFilenameButContainsAdditionalPathSegmentsThenFilenameShouldBeExtracted() {
37+
val url = "https://foo.example.com/path/images/b/b1/realFilename.jpg/other/stuff"
38+
val mimeType: String? = null
39+
val contentDisposition: String? = null
40+
val extracted = testee.extract(buildPendingDownload(url, contentDisposition, mimeType))
41+
assertEquals("realFilename.jpg", extracted)
42+
}
43+
44+
@Test
45+
fun whenUrlContainsFilenameButContainsAdditionalPathSegmentsAndQueryParamsThenFilenameShouldBeExtracted() {
46+
val url = "https://foo.example.com/path/images/b/b1/realFilename.jpg/other/stuff?cb=123"
47+
val mimeType: String? = null
48+
val contentDisposition: String? = null
49+
val extracted = testee.extract(buildPendingDownload(url, contentDisposition, mimeType))
50+
assertEquals("realFilename.jpg", extracted)
51+
}
52+
53+
@Test
54+
fun whenUrlContainsFilenameButContainsAdditionalPathSegmentsAndQueryParamsWhichLookLikeAFilenameThenFilenameShouldBeExtracted() {
55+
val url = "https://foo.example.com/path/images/b/b1/realFilename.jpg/other/stuff?cb=123.com"
56+
val mimeType: String? = null
57+
val contentDisposition: String? = null
58+
val extracted = testee.extract(buildPendingDownload(url, contentDisposition, mimeType))
59+
assertEquals("realFilename.jpg", extracted)
60+
}
61+
62+
@Test
63+
fun whenUrlContainsFilenameButContentDispositionSaysOtherwiseThenExtractFromContentDisposition() {
64+
val url = "https://example.com/filename.jpg"
65+
val mimeType: String? = null
66+
val contentDisposition: String? = "Content-Disposition: attachment; filename=fromDisposition.jpg"
67+
val extracted = testee.extract(buildPendingDownload(url, contentDisposition, mimeType))
68+
assertEquals("fromDisposition.jpg", extracted)
69+
}
70+
71+
@Test
72+
fun whenFilenameEndsInBinThenThatIsExtracted() {
73+
val url = "https://example.com/realFilename.bin"
74+
val mimeType: String? = null
75+
val contentDisposition: String? = null
76+
val extracted = testee.extract(buildPendingDownload(url, contentDisposition, mimeType))
77+
assertEquals("realFilename.bin", extracted)
78+
}
79+
80+
@Test
81+
fun whenUrlContainsNoFileNameButLotsOfPathsSegmentsThenFirstSegmentNameIsUsed() {
82+
val url = "https://example.com/foo/bar/files"
83+
val mimeType: String? = null
84+
val contentDisposition: String? = null
85+
val extracted = testee.extract(buildPendingDownload(url, contentDisposition, mimeType))
86+
assertEquals("foo.bin", extracted)
87+
}
88+
89+
@Test
90+
fun whenFilenameEndsInBinWithASlashThenThatIsExtracted() {
91+
val url = "https://example.com/realFilename.bin/"
92+
val mimeType: String? = null
93+
val contentDisposition: String? = null
94+
val extracted = testee.extract(buildPendingDownload(url, contentDisposition, mimeType))
95+
assertEquals("realFilename.bin", extracted)
96+
}
97+
98+
@Test
99+
fun whenFilenameContainsBinThenThatIsExtracted() {
100+
val url = "https://example.com/realFilename.bin/foo/bar"
101+
val mimeType: String? = null
102+
val contentDisposition: String? = null
103+
val extracted = testee.extract(buildPendingDownload(url, contentDisposition, mimeType))
104+
assertEquals("realFilename.bin", extracted)
105+
}
106+
107+
@Test
108+
fun whenUrlIsEmptyStringAndNoOtherDataProvidedThenDefaultNameAndBinFiletypeReturned() {
109+
val url = ""
110+
val mimeType: String? = null
111+
val contentDisposition: String? = null
112+
val extracted = testee.extract(buildPendingDownload(url, contentDisposition, mimeType))
113+
assertEquals("downloadfile.bin", extracted)
114+
}
115+
116+
@Test
117+
fun whenUrlIsEmptyStringAndMimeTypeProvidedThenDefaultNameAndFiletypeFromMimeReturned() {
118+
val url = ""
119+
val mimeType: String? = "image/jpeg"
120+
val contentDisposition: String? = null
121+
val extracted = testee.extract(buildPendingDownload(url, contentDisposition, mimeType))
122+
assertEquals("downloadfile.jpg", extracted)
123+
}
124+
125+
@Test
126+
fun whenUrlIsEmptyStringAndContentDispositionProvidedThenExtractFromContentDisposition() {
127+
val url = ""
128+
val mimeType: String? = null
129+
val contentDisposition: String? = "Content-Disposition: attachment; filename=fromDisposition.jpg"
130+
val extracted = testee.extract(buildPendingDownload(url, contentDisposition, mimeType))
131+
assertEquals("fromDisposition.jpg", extracted)
132+
}
133+
134+
@Test
135+
fun whenNoFilenameAndNoPathSegmentsThenDomainNameReturned() {
136+
val url = "http://example.com"
137+
val mimeType: String? = null
138+
val contentDisposition: String? = null
139+
val extracted = testee.extract(buildPendingDownload(url, contentDisposition, mimeType))
140+
assertEquals("example.com", extracted)
141+
}
142+
143+
private fun buildPendingDownload(url: String, contentDisposition: String?, mimeType: String?): FileDownloader.PendingFileDownload {
144+
return FileDownloader.PendingFileDownload(
145+
url = url,
146+
contentDisposition = contentDisposition,
147+
mimeType = mimeType,
148+
subfolder = "aFolder",
149+
userAgent = "aUserAgent"
150+
)
151+
}
152+
}

app/src/main/java/com/duckduckgo/app/browser/DownloadConfirmationFragment.kt

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ import android.view.LayoutInflater
2222
import android.view.View
2323
import android.view.ViewGroup
2424
import com.duckduckgo.app.browser.downloader.FileDownloader.PendingFileDownload
25-
import com.duckduckgo.app.browser.downloader.guessFileName
25+
import com.duckduckgo.app.browser.downloader.FilenameExtractor
2626
import com.duckduckgo.app.browser.downloader.isDataUrl
2727
import com.duckduckgo.app.global.view.gone
2828
import com.duckduckgo.app.global.view.leftDrawable
@@ -32,12 +32,16 @@ import dagger.android.support.AndroidSupportInjection
3232
import kotlinx.android.synthetic.main.download_confirmation.view.*
3333
import timber.log.Timber
3434
import java.io.File
35+
import javax.inject.Inject
3536

3637
class DownloadConfirmationFragment : BottomSheetDialogFragment() {
3738

3839
val listener: DownloadConfirmationDialogListener
3940
get() = parentFragment as DownloadConfirmationDialogListener
4041

42+
@Inject
43+
lateinit var filenameExtractor: FilenameExtractor
44+
4145
private var file: File? = null
4246

4347
private val pendingDownload: PendingFileDownload by lazy {
@@ -57,7 +61,7 @@ class DownloadConfirmationFragment : BottomSheetDialogFragment() {
5761
}
5862

5963
private fun setupDownload() {
60-
file = if (!pendingDownload.isDataUrl) File(pendingDownload.directory, pendingDownload.guessFileName()) else null
64+
file = if (!pendingDownload.isDataUrl) File(pendingDownload.directory, filenameExtractor.extract(pendingDownload)) else null
6165
}
6266

6367
private fun setupViews(view: View) {

app/src/main/java/com/duckduckgo/app/browser/downloader/FileDownloader.kt

Lines changed: 0 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,6 @@ import android.webkit.URLUtil
2222
import androidx.annotation.WorkerThread
2323
import com.duckduckgo.app.browser.downloader.FileDownloader.FileDownloadListener
2424
import com.duckduckgo.app.browser.downloader.FileDownloader.PendingFileDownload
25-
import timber.log.Timber
2625
import java.io.File
2726
import java.io.Serializable
2827
import javax.inject.Inject
@@ -67,12 +66,6 @@ class AndroidFileDownloader @Inject constructor(
6766
}
6867
}
6968

70-
fun PendingFileDownload.guessFileName(): String {
71-
val guessedFileName = URLUtil.guessFileName(url, contentDisposition, mimeType)
72-
Timber.i("Guessed filename of $guessedFileName for url $url")
73-
return guessedFileName
74-
}
75-
7669
val PendingFileDownload.isDataUrl get() = URLUtil.isDataUrl(url)
7770

7871
val PendingFileDownload.isNetworkUrl get() = URLUtil.isNetworkUrl(url)
Lines changed: 100 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,100 @@
1+
/*
2+
* Copyright (c) 2020 DuckDuckGo
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
17+
package com.duckduckgo.app.browser.downloader
18+
19+
import android.webkit.URLUtil
20+
import androidx.core.net.toUri
21+
import com.duckduckgo.app.browser.downloader.FileDownloader.PendingFileDownload
22+
import com.duckduckgo.app.browser.downloader.FilenameExtractor.GuessQuality.NotGoodEnough
23+
import com.duckduckgo.app.browser.downloader.FilenameExtractor.GuessQuality.TriedAllOptions
24+
import timber.log.Timber
25+
import javax.inject.Inject
26+
27+
class FilenameExtractor @Inject constructor() {
28+
29+
fun extract(pendingDownload: PendingFileDownload): String {
30+
val url = pendingDownload.url
31+
val mimeType = pendingDownload.mimeType
32+
val contentDisposition = pendingDownload.contentDisposition
33+
34+
val firstGuess = guessFilename(url, contentDisposition, mimeType)
35+
val guesses = Guesses(bestGuess = null, latestGuess = firstGuess)
36+
val baseUrl = url.toUri().host ?: ""
37+
var pathSegments = pathSegments(url)
38+
39+
while (evaluateGuessQuality(guesses, pathSegments) != TriedAllOptions) {
40+
pathSegments = pathSegments.dropLast(1)
41+
guesses.latestGuess = guessFilename(baseUrl + "/" + pathSegments.rebuildUrl(), contentDisposition, mimeType)
42+
}
43+
44+
return bestGuess(guesses)
45+
}
46+
47+
private fun evaluateGuessQuality(guesses: Guesses, pathSegments: List<String>): GuessQuality {
48+
val latestGuess = guesses.latestGuess
49+
50+
// if it contains a '.' then it's a good chance of a filetype and we can update our best guess
51+
if (latestGuess.contains(".")) {
52+
guesses.bestGuess = latestGuess
53+
}
54+
55+
if (pathSegments.size < 2) return TriedAllOptions
56+
57+
return NotGoodEnough
58+
}
59+
60+
private fun guessFilename(url: String, contentDisposition: String?, mimeType: String?): String {
61+
val tidiedUrl = url.removeSuffix("/")
62+
var guessedFilename = URLUtil.guessFileName(tidiedUrl, contentDisposition, mimeType)
63+
64+
// we only want to keep the default .bin filetype on the guess if the URL actually has that too
65+
if (guessedFilename.endsWith(DEFAULT_FILE_TYPE) && !tidiedUrl.endsWith(DEFAULT_FILE_TYPE)) {
66+
guessedFilename = guessedFilename.removeSuffix(DEFAULT_FILE_TYPE)
67+
}
68+
69+
Timber.v("From URL [$url], guessed [$guessedFilename]")
70+
return guessedFilename
71+
}
72+
73+
private fun bestGuess(guesses: Guesses): String {
74+
val guess = guesses.bestGuess ?: guesses.latestGuess
75+
if (!guess.contains(".")) {
76+
return guess + DEFAULT_FILE_TYPE
77+
}
78+
return guess
79+
}
80+
81+
private fun pathSegments(url: String): List<String> {
82+
return url.toUri().pathSegments ?: emptyList()
83+
}
84+
85+
private fun List<String>.rebuildUrl(): String {
86+
return joinToString(separator = "/")
87+
}
88+
89+
companion object {
90+
private const val DEFAULT_FILE_TYPE = ".bin"
91+
}
92+
93+
sealed class GuessQuality {
94+
object NotGoodEnough : GuessQuality()
95+
object TriedAllOptions : GuessQuality()
96+
}
97+
98+
data class Guesses(var latestGuess: String, var bestGuess: String? = null)
99+
100+
}

app/src/main/java/com/duckduckgo/app/browser/downloader/NetworkFileDownloader.kt

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ import com.duckduckgo.app.browser.R
2525
import com.duckduckgo.app.browser.downloader.FileDownloader.PendingFileDownload
2626
import javax.inject.Inject
2727

28-
class NetworkFileDownloader @Inject constructor(private val context: Context) {
28+
class NetworkFileDownloader @Inject constructor(private val context: Context, private val filenameExtractor: FilenameExtractor) {
2929

3030
fun download(pendingDownload: PendingFileDownload, callback: FileDownloader.FileDownloadListener) {
3131

@@ -34,7 +34,7 @@ class NetworkFileDownloader @Inject constructor(private val context: Context) {
3434
return
3535
}
3636

37-
val guessedFileName = pendingDownload.guessFileName()
37+
val guessedFileName = filenameExtractor.extract(pendingDownload)
3838

3939
val request = DownloadManager.Request(pendingDownload.url.toUri()).apply {
4040
allowScanningByMediaScanner()

0 commit comments

Comments
 (0)