Skip to content

Commit 1199e90

Browse files
Content-disposition: filename safe, filename* unsafe => use filename (#271)
This is very much an edge case, but it was bugging me. If the filename attribute is safe to use but the filename* attribute is unsafe then we use the filename instead of whatever the fallback would have been.
1 parent e7fe58f commit 1199e90

File tree

3 files changed

+20
-13
lines changed

3 files changed

+20
-13
lines changed

src/Downloads.jl

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -265,10 +265,7 @@ function download(
265265
if output === nothing
266266
try_rename = true
267267
# guess file name from URL (might not be final name)
268-
name = url_filename(url)
269-
if !is_safe_filename(name)
270-
name = DEFAULT_FILENAME
271-
end
268+
name = something(url_filename(url), DEFAULT_FILENAME)
272269
output = joinpath(mktempdir(), name)
273270
end
274271
local response # capture outside closure
@@ -290,7 +287,7 @@ function download(
290287
# fix file suffix based on headers & redirected URL
291288
if try_rename && ispath(path)
292289
name = get_filename(response)
293-
if is_safe_filename(name)
290+
if name !== nothing
294291
path′ = joinpath(dirname(path), name)
295292
@assert dirname(path) == dirname(path′)
296293
if path != path′

src/filenames.jl

Lines changed: 9 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -35,8 +35,11 @@ end
3535

3636
function url_filename(url::AbstractString)
3737
m = match(r"^[a-z][a-z+._-]*://[^#?]*/([^/#?]+)(?:[#?]|$)"i, url)
38-
m === nothing && return
39-
url_unescape(m[1])
38+
if m !== nothing
39+
name = url_unescape(m[1])
40+
is_safe_filename(name) && return name
41+
end
42+
return nothing
4043
end
4144

4245
let # build some complex regular expressions
@@ -109,11 +112,10 @@ function get_filename(response::Response)
109112
end
110113
end
111114
end
112-
filename⁺ !== nothing && return filename⁺
113-
filename !== nothing && return filename
114-
# no usable content disposition header
115-
# extract from URL after redirects
116-
return url_filename(response.url)
115+
filename⁺ !== nothing && is_safe_filename(filename⁺) && return filename⁺
116+
filename !== nothing && is_safe_filename(filename) && return filename
117+
# no usable content disposition header, extract from URL after redirects
118+
return url_filename(response.url) # calls is_safe_filename
117119
end
118120

119121
# Special names on Windows: CON PRN AUX NUL COM1-9 LPT1-9

test/runtests.jl

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -227,7 +227,11 @@ include("setup.jl")
227227
end
228228
for name in file_names,
229229
url in urls_with_filename(name)
230-
@test name === Downloads.url_filename(url)
230+
if Sys.iswindows() && '"' in name
231+
@test nothing === Downloads.url_filename(url)
232+
else
233+
@test name === Downloads.url_filename(url)
234+
end
231235
end
232236
# URLs that we shouldn't get a file name from
233237
for url in [
@@ -316,6 +320,10 @@ include("setup.jl")
316320
url = content_disposition_url(:utf8 => name, :latin1 => name⁺)
317321
@test name⁺ == splitdir(download(url))[2]
318322
end
323+
let name = "valid.txt", name⁺ = "invalid\0.txt"
324+
url = content_disposition_url(:ascii => name, :utf8 => name⁺)
325+
@test name == splitdir(download(url))[2]
326+
end
319327
end
320328
@testset "invalid content disposition" begin
321329
# invalid content disposition header syntax

0 commit comments

Comments
 (0)