Skip to content

Commit e3b3f05

Browse files
lovesegfaultxokdvium
authored andcommitted
fix(nix-prefetch-url): correctly extract filename from URLs with query parameters
Previously, `prefetchFile()` used `baseNameOf()` directly on the URL string to extract the filename. This caused issues with URLs containing query parameters that include slashes, such as S3 URLs with custom endpoints: ``` s3://bucket/file.txt?endpoint=http://server:9000 ``` The `baseNameOf()` function naively searches for the rightmost `/` in the entire string, which would find the `/` in `http://server:9000` and extract `server:9000&region=...` as the filename. This resulted in invalid store path names containing illegal characters like `:`. This commit fixes the issue by: 1. Adding a `VerbatimURL::lastPathSegment()` method that extracts the last non-empty path segment from a URL, using `pathSegments(true)` to filter empty segments 2. Changing `prefetchFile()` to accept `const VerbatimURL &` and use the new `lastPathSegment()` method instead of manual path parsing 3. Adding early validation with `checkName()` to fail quickly on invalid filenames 4. Maintains backward compatibility by falling back to `baseNameOf()` for unparsable `VerbatimURL`s
1 parent ddf7de0 commit e3b3f05

File tree

3 files changed

+44
-7
lines changed

3 files changed

+44
-7
lines changed

src/libutil/include/nix/util/url.hh

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -408,6 +408,17 @@ struct VerbatimURL
408408
[](const ParsedURL & url) -> std::string_view { return url.scheme; }},
409409
raw);
410410
}
411+
412+
/**
413+
* Get the last non-empty path segment from the URL.
414+
*
415+
* This is useful for extracting filenames from URLs.
416+
* For example, "https://example.com/path/to/file.txt?query=value"
417+
* returns "file.txt".
418+
*
419+
* @return The last non-empty path segment, or std::nullopt if no such segment exists.
420+
*/
421+
std::optional<std::string> lastPathSegment() const;
411422
};
412423

413424
std::ostream & operator<<(std::ostream & os, const VerbatimURL & url);

src/libutil/url.cc

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
#include "nix/util/split.hh"
55
#include "nix/util/canon-path.hh"
66
#include "nix/util/strings-inline.hh"
7+
#include "nix/util/file-system.hh"
78

89
#include <boost/url.hpp>
910

@@ -440,4 +441,21 @@ std::ostream & operator<<(std::ostream & os, const VerbatimURL & url)
440441
return os;
441442
}
442443

444+
std::optional<std::string> VerbatimURL::lastPathSegment() const
445+
{
446+
try {
447+
auto parsedUrl = parsed();
448+
auto segments = parsedUrl.pathSegments(/*skipEmpty=*/true);
449+
if (std::ranges::empty(segments))
450+
return std::nullopt;
451+
return segments.back();
452+
} catch (BadURL &) {
453+
// Fall back to baseNameOf for unparsable URLs
454+
auto name = baseNameOf(to_string());
455+
if (name.empty())
456+
return std::nullopt;
457+
return std::string{name};
458+
}
459+
}
460+
443461
} // namespace nix

src/nix/prefetch.cc

Lines changed: 15 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,8 @@
1313
#include "nix/cmd/misc-store-flags.hh"
1414
#include "nix/util/terminal.hh"
1515
#include "nix/util/environment-variables.hh"
16+
#include "nix/util/url.hh"
17+
#include "nix/store/path.hh"
1618

1719
#include "man-pages.hh"
1820

@@ -56,7 +58,7 @@ std::string resolveMirrorUrl(EvalState & state, const std::string & url)
5658

5759
std::tuple<StorePath, Hash> prefetchFile(
5860
ref<Store> store,
59-
std::string_view url,
61+
const VerbatimURL & url,
6062
std::optional<std::string> name,
6163
HashAlgorithm hashAlgo,
6264
std::optional<Hash> expectedHash,
@@ -68,9 +70,15 @@ std::tuple<StorePath, Hash> prefetchFile(
6870

6971
/* Figure out a name in the Nix store. */
7072
if (!name) {
71-
name = baseNameOf(url);
72-
if (name->empty())
73-
throw Error("cannot figure out file name for '%s'", url);
73+
name = url.lastPathSegment();
74+
if (!name || name->empty())
75+
throw Error("cannot figure out file name for '%s'", url.to_string());
76+
}
77+
try {
78+
checkName(*name);
79+
} catch (BadStorePathName & e) {
80+
e.addTrace({}, "file name '%s' was extracted from URL '%s'", *name, url.to_string());
81+
throw;
7482
}
7583

7684
std::optional<StorePath> storePath;
@@ -105,14 +113,14 @@ std::tuple<StorePath, Hash> prefetchFile(
105113

106114
FdSink sink(fd.get());
107115

108-
FileTransferRequest req(VerbatimURL{url});
116+
FileTransferRequest req(url);
109117
req.decompress = false;
110118
getFileTransfer()->download(std::move(req), sink);
111119
}
112120

113121
/* Optionally unpack the file. */
114122
if (unpack) {
115-
Activity act(*logger, lvlChatty, actUnknown, fmt("unpacking '%s'", url));
123+
Activity act(*logger, lvlChatty, actUnknown, fmt("unpacking '%s'", url.to_string()));
116124
auto unpacked = (tmpDir.path() / "unpacked").string();
117125
createDirs(unpacked);
118126
unpackTarfile(tmpFile.string(), unpacked);
@@ -128,7 +136,7 @@ std::tuple<StorePath, Hash> prefetchFile(
128136
}
129137
}
130138

131-
Activity act(*logger, lvlChatty, actUnknown, fmt("adding '%s' to the store", url));
139+
Activity act(*logger, lvlChatty, actUnknown, fmt("adding '%s' to the store", url.to_string()));
132140

133141
auto info = store->addToStoreSlow(
134142
*name, PosixSourceAccessor::createAtRoot(tmpFile), method, hashAlgo, {}, expectedHash);

0 commit comments

Comments
 (0)