Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion src/libfetchers-tests/git-utils.cc
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,7 @@ TEST_F(GitUtilsTest, sink_basic)
// sink->createHardlink("foo-1.1/links/foo-2", CanonPath("foo-1.1/hello"));

auto result = repo->dereferenceSingletonDirectory(sink->flush());
auto accessor = repo->getAccessor(result, false, getRepoName());
auto accessor = repo->getAccessor(result, {}, getRepoName());
auto entries = accessor->readDirectory(CanonPath::root);
ASSERT_EQ(entries.size(), 5u);
ASSERT_EQ(accessor->readFile(CanonPath("hello")), "hello world");
Expand Down
13 changes: 9 additions & 4 deletions src/libfetchers/fetchers.cc
Original file line number Diff line number Diff line change
Expand Up @@ -327,7 +327,12 @@ std::pair<ref<SourceAccessor>, Input> Input::getAccessorUnchecked(const Settings
auto makeStoreAccessor = [&]() -> std::pair<ref<SourceAccessor>, Input> {
auto accessor = make_ref<SubstitutedSourceAccessor>(ref{store->getFSAccessor(*storePath)});

accessor->fingerprint = getFingerprint(store);
// FIXME: use the NAR hash for fingerprinting Git trees that have a .gitattributes file, since we don't know if
// we used `git archive` or libgit2 to fetch it.
accessor->fingerprint = getType() == "git" && accessor->pathExists(CanonPath(".gitattributes"))
? std::optional(storePath->hashPart())
: getFingerprint(store);
cachedFingerprint = accessor->fingerprint;

// Store a cache entry for the substituted tree so later fetches
// can reuse the existing nar instead of copying the unpacked
Expand Down Expand Up @@ -357,10 +362,10 @@ std::pair<ref<SourceAccessor>, Input> Input::getAccessorUnchecked(const Settings
try {
auto [accessor, result] = scheme->getAccessor(settings, store, *this);

if (!accessor->fingerprint)
accessor->fingerprint = result.getFingerprint(store);
if (auto fp = accessor->getFingerprint(CanonPath::root).second)
result.cachedFingerprint = *fp;
else
result.cachedFingerprint = accessor->fingerprint;
accessor->fingerprint = result.getFingerprint(store);

return {accessor, std::move(result)};
} catch (Error & e) {
Expand Down
44 changes: 26 additions & 18 deletions src/libfetchers/git-utils.cc
Original file line number Diff line number Diff line change
Expand Up @@ -550,14 +550,15 @@ struct GitRepoImpl : GitRepo, std::enable_shared_from_this<GitRepoImpl>
}

/**
* A 'GitSourceAccessor' with no regard for export-ignore or any other transformations.
* A 'GitSourceAccessor' with no regard for export-ignore.
*/
ref<GitSourceAccessor> getRawAccessor(const Hash & rev, bool smudgeLfs = false);
ref<GitSourceAccessor> getRawAccessor(const Hash & rev, const GitAccessorOptions & options);

ref<SourceAccessor>
getAccessor(const Hash & rev, bool exportIgnore, std::string displayPrefix, bool smudgeLfs = false) override;
getAccessor(const Hash & rev, const GitAccessorOptions & options, std::string displayPrefix) override;

ref<SourceAccessor> getAccessor(const WorkdirInfo & wd, bool exportIgnore, MakeNotAllowedError e) override;
ref<SourceAccessor>
getAccessor(const WorkdirInfo & wd, const GitAccessorOptions & options, MakeNotAllowedError e) override;

ref<GitFileSystemObjectSink> getFileSystemObjectSink() override;

Expand Down Expand Up @@ -700,7 +701,7 @@ struct GitRepoImpl : GitRepo, std::enable_shared_from_this<GitRepoImpl>

Hash treeHashToNarHash(const fetchers::Settings & settings, const Hash & treeHash) override
{
auto accessor = getAccessor(treeHash, false, "");
auto accessor = getAccessor(treeHash, {}, "");

fetchers::Cache::Key cacheKey{"treeHashToNarHash", {{"treeHash", treeHash.gitRev()}}};

Expand Down Expand Up @@ -737,28 +738,35 @@ ref<GitRepo> GitRepo::openRepo(const std::filesystem::path & path, bool create,
return make_ref<GitRepoImpl>(path, create, bare);
}

std::string GitAccessorOptions::makeFingerprint(const Hash & rev) const
{
return "git:" + rev.gitRev() + (exportIgnore ? ";e" : "") + (smudgeLfs ? ";l" : "");
}

/**
* Raw git tree input accessor.
*/

struct GitSourceAccessor : SourceAccessor
{
struct State
{
ref<GitRepoImpl> repo;
Object root;
std::optional<lfs::Fetch> lfsFetch = std::nullopt;
GitAccessorOptions options;
};

Sync<State> state_;

GitSourceAccessor(ref<GitRepoImpl> repo_, const Hash & rev, bool smudgeLfs)
GitSourceAccessor(ref<GitRepoImpl> repo_, const Hash & rev, const GitAccessorOptions & options)
: state_{State{
.repo = repo_,
.root = peelToTreeOrBlob(lookupObject(*repo_, hashToOID(rev)).get()),
.lfsFetch = smudgeLfs ? std::make_optional(lfs::Fetch(*repo_, hashToOID(rev))) : std::nullopt,
.lfsFetch = options.smudgeLfs ? std::make_optional(lfs::Fetch(*repo_, hashToOID(rev))) : std::nullopt,
.options = options,
}}
{
fingerprint = options.makeFingerprint(rev);
}

std::string readBlob(const CanonPath & path, bool symlink)
Expand Down Expand Up @@ -1307,26 +1315,26 @@ struct GitFileSystemObjectSinkImpl : GitFileSystemObjectSink
}
};

ref<GitSourceAccessor> GitRepoImpl::getRawAccessor(const Hash & rev, bool smudgeLfs)
ref<GitSourceAccessor> GitRepoImpl::getRawAccessor(const Hash & rev, const GitAccessorOptions & options)
{
auto self = ref<GitRepoImpl>(shared_from_this());
return make_ref<GitSourceAccessor>(self, rev, smudgeLfs);
return make_ref<GitSourceAccessor>(self, rev, options);
}

ref<SourceAccessor>
GitRepoImpl::getAccessor(const Hash & rev, bool exportIgnore, std::string displayPrefix, bool smudgeLfs)
GitRepoImpl::getAccessor(const Hash & rev, const GitAccessorOptions & options, std::string displayPrefix)
{
auto self = ref<GitRepoImpl>(shared_from_this());
ref<GitSourceAccessor> rawGitAccessor = getRawAccessor(rev, smudgeLfs);
ref<GitSourceAccessor> rawGitAccessor = getRawAccessor(rev, options);
rawGitAccessor->setPathDisplay(std::move(displayPrefix));
if (exportIgnore)
if (options.exportIgnore)
return make_ref<GitExportIgnoreSourceAccessor>(self, rawGitAccessor, rev);
else
return rawGitAccessor;
}

ref<SourceAccessor>
GitRepoImpl::getAccessor(const WorkdirInfo & wd, bool exportIgnore, MakeNotAllowedError makeNotAllowedError)
ref<SourceAccessor> GitRepoImpl::getAccessor(
const WorkdirInfo & wd, const GitAccessorOptions & options, MakeNotAllowedError makeNotAllowedError)
{
auto self = ref<GitRepoImpl>(shared_from_this());
ref<SourceAccessor> fileAccessor = AllowListSourceAccessor::create(
Expand All @@ -1336,7 +1344,7 @@ GitRepoImpl::getAccessor(const WorkdirInfo & wd, bool exportIgnore, MakeNotAllow
boost::unordered_flat_set<CanonPath>{CanonPath::root},
std::move(makeNotAllowedError))
.cast<SourceAccessor>();
if (exportIgnore)
if (options.exportIgnore)
fileAccessor = make_ref<GitExportIgnoreSourceAccessor>(self, fileAccessor, std::nullopt);
return fileAccessor;
}
Expand All @@ -1351,7 +1359,7 @@ std::vector<std::tuple<GitRepoImpl::Submodule, Hash>> GitRepoImpl::getSubmodules
/* Read the .gitmodules files from this revision. */
CanonPath modulesFile(".gitmodules");

auto accessor = getAccessor(rev, exportIgnore, "");
auto accessor = getAccessor(rev, {.exportIgnore = exportIgnore}, "");
if (!accessor->pathExists(modulesFile))
return {};

Expand All @@ -1368,7 +1376,7 @@ std::vector<std::tuple<GitRepoImpl::Submodule, Hash>> GitRepoImpl::getSubmodules

std::vector<std::tuple<Submodule, Hash>> result;

auto rawAccessor = getRawAccessor(rev);
auto rawAccessor = getRawAccessor(rev, {});

for (auto & submodule : parseSubmodules(pathTemp)) {
/* Filter out .gitmodules entries that don't exist or are not
Expand Down
136 changes: 122 additions & 14 deletions src/libfetchers/git.cc
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
#include "nix/util/json-utils.hh"
#include "nix/util/archive.hh"
#include "nix/util/mounted-source-accessor.hh"
#include "nix/fetchers/fetch-to-store.hh"

#include <regex>
#include <string.h>
Expand Down Expand Up @@ -637,8 +638,72 @@ struct GitInputScheme : InputScheme
return shallow || input.getRevCount().has_value();
}

GitAccessorOptions getGitAccessorOptions(const Input & input) const
{
return GitAccessorOptions{
.exportIgnore = getExportIgnoreAttr(input),
.smudgeLfs = getLfsAttr(input),
.submodules = getSubmodulesAttr(input),
};
}

/**
* Get a `SourceAccessor` for the given Git revision using Nix < 2.20 semantics, i.e. using `git archive` or `git
* checkout`.
*/
ref<SourceAccessor> getLegacyGitAccessor(
Store & store,
RepoInfo & repoInfo,
const std::filesystem::path & repoDir,
const Hash & rev,
GitAccessorOptions & options) const
{
auto tmpDir = createTempDir();
AutoDelete delTmpDir(tmpDir, true);

auto storePath =
options.submodules
? [&]() {
// Nix < 2.20 used `git checkout` for repos with submodules.
runProgram2({.program = "git", .args = {"init", tmpDir}});
runProgram2({.program = "git", .args = {"-C", tmpDir, "remote", "add", "origin", repoDir}});
runProgram2({.program = "git", .args = {"-C", tmpDir, "fetch", "origin", rev.gitRev()}});
runProgram2({.program = "git", .args = {"-C", tmpDir, "checkout", rev.gitRev()}});
PathFilter filter = [&](const Path & path) { return baseNameOf(path) != ".git"; };
return store.addToStore(
"source",
{getFSSourceAccessor(), CanonPath(tmpDir)},
ContentAddressMethod::Raw::NixArchive,
HashAlgorithm::SHA256,
{},
filter);
}()
: [&]() {
// Nix < 2.20 used `git archive` for repos without submodules.
options.exportIgnore = true;

auto source = sinkToSource([&](Sink & sink) {
runProgram2(
{.program = "git",
.args = {"-C", repoDir, "--git-dir", repoInfo.gitDir, "archive", rev.gitRev()},
.standardOut = &sink});
});

unpackTarfile(*source, tmpDir);

return store.addToStore("source", {getFSSourceAccessor(), CanonPath(tmpDir)});
}();

auto accessor = store.getFSAccessor(storePath);

accessor->fingerprint = options.makeFingerprint(rev) + ";legacy";

return ref{accessor};
}

std::pair<ref<SourceAccessor>, Input>
getAccessorFromCommit(const Settings & settings, ref<Store> store, RepoInfo & repoInfo, Input && input) const

{
assert(!repoInfo.workdirInfo.isDirty);

Expand Down Expand Up @@ -779,17 +844,59 @@ struct GitInputScheme : InputScheme

verifyCommit(input, repo);

bool exportIgnore = getExportIgnoreAttr(input);
bool smudgeLfs = getLfsAttr(input);
auto accessor = repo->getAccessor(rev, exportIgnore, "«" + input.to_string(true) + "»", smudgeLfs);
auto options = getGitAccessorOptions(input);

auto expectedNarHash = input.getNarHash();

auto accessor = repo->getAccessor(rev, options, "«" + input.to_string(true) + "»");

if (settings.nix219Compat && !options.smudgeLfs && accessor->pathExists(CanonPath(".gitattributes"))) {
/* Use Nix 2.19 semantics to generate locks, but if a NAR hash is specified, support Nix >= 2.20 semantics
* as well. */
warn("Using Nix 2.19 semantics to export Git repository '%s'.", input.to_string());
auto accessorModern = accessor;
accessor = getLegacyGitAccessor(*store, repoInfo, repoDir, rev, options);
if (expectedNarHash) {
auto narHashLegacy =
fetchToStore2(settings, *store, {accessor}, FetchMode::DryRun, input.getName()).second;
if (expectedNarHash != narHashLegacy) {
auto narHashModern =
fetchToStore2(settings, *store, {accessorModern}, FetchMode::DryRun, input.getName()).second;
if (expectedNarHash == narHashModern)
accessor = accessorModern;
}
}
} else {
/* Backward compatibility hack for locks produced by Nix < 2.20 that depend on Nix applying Git filters,
* `export-ignore` or `export-subst`. Nix >= 2.20 doesn't do those, so we may get a NAR hash mismatch. If
* that happens, try again using `git archive`. */
auto narHashNew = fetchToStore2(settings, *store, {accessor}, FetchMode::DryRun, input.getName()).second;
if (expectedNarHash && accessor->pathExists(CanonPath(".gitattributes"))) {
if (expectedNarHash != narHashNew) {
auto accessorLegacy = getLegacyGitAccessor(*store, repoInfo, repoDir, rev, options);
auto narHashLegacy =
fetchToStore2(settings, *store, {accessorLegacy}, FetchMode::DryRun, input.getName()).second;
if (expectedNarHash == narHashLegacy) {
warn(
"Git input '%s' specifies a NAR hash '%s' that was created by Nix < 2.20.\n"
"Nix >= 2.20 does not apply Git filters, `export-ignore` and `export-subst` by default, which changes the NAR hash.\n"
"Please update the NAR hash to '%s'.",
input.to_string(),
expectedNarHash->to_string(HashFormat::SRI, true),
narHashNew.to_string(HashFormat::SRI, true));
accessor = accessorLegacy;
}
}
}
}

/* If the repo has submodules, fetch them and return a mounted
input accessor consisting of the accessor for the top-level
repo and the accessors for the submodules. */
if (getSubmodulesAttr(input)) {
if (options.submodules) {
std::map<CanonPath, nix::ref<SourceAccessor>> mounts;

for (auto & [submodule, submoduleRev] : repo->getSubmodules(rev, exportIgnore)) {
for (auto & [submodule, submoduleRev] : repo->getSubmodules(rev, options.exportIgnore)) {
auto resolved = repo->resolveSubmoduleUrl(submodule.url);
debug(
"Git submodule %s: %s %s %s -> %s",
Expand All @@ -812,9 +919,9 @@ struct GitInputScheme : InputScheme
}
}
attrs.insert_or_assign("rev", submoduleRev.gitRev());
attrs.insert_or_assign("exportIgnore", Explicit<bool>{exportIgnore});
attrs.insert_or_assign("exportIgnore", Explicit<bool>{options.exportIgnore});
attrs.insert_or_assign("submodules", Explicit<bool>{true});
attrs.insert_or_assign("lfs", Explicit<bool>{smudgeLfs});
attrs.insert_or_assign("lfs", Explicit<bool>{options.smudgeLfs});
attrs.insert_or_assign("allRefs", Explicit<bool>{true});
auto submoduleInput = fetchers::Input::fromAttrs(settings, std::move(attrs));
auto [submoduleAccessor, submoduleInput2] = submoduleInput.getAccessor(settings, store);
Expand All @@ -823,8 +930,10 @@ struct GitInputScheme : InputScheme
}

if (!mounts.empty()) {
auto newFingerprint = accessor->getFingerprint(CanonPath::root).second->append(";s");
mounts.insert_or_assign(CanonPath::root, accessor);
accessor = makeMountedSourceAccessor(std::move(mounts));
accessor->fingerprint = newFingerprint;
}
}

Expand All @@ -848,7 +957,7 @@ struct GitInputScheme : InputScheme
auto exportIgnore = getExportIgnoreAttr(input);

ref<SourceAccessor> accessor =
repo->getAccessor(repoInfo.workdirInfo, exportIgnore, makeNotAllowedError(repoPath));
repo->getAccessor(repoInfo.workdirInfo, {.exportIgnore = exportIgnore}, makeNotAllowedError(repoPath));

/* If the repo has submodules, return a mounted input accessor
consisting of the accessor for the top-level repo and the
Expand Down Expand Up @@ -942,13 +1051,12 @@ struct GitInputScheme : InputScheme

std::optional<std::string> getFingerprint(ref<Store> store, const Input & input) const override
{
auto makeFingerprint = [&](const Hash & rev) {
return rev.gitRev() + (getSubmodulesAttr(input) ? ";s" : "") + (getExportIgnoreAttr(input) ? ";e" : "")
+ (getLfsAttr(input) ? ";l" : "");
};
auto options = getGitAccessorOptions(input);

if (auto rev = input.getRev())
return makeFingerprint(*rev);
// FIXME: this can return a wrong fingerprint for the legacy (`git archive`) case, since we don't know here
// whether to append the `;legacy` suffix or not.
return options.makeFingerprint(*rev);
else {
auto repoInfo = getRepoInfo(input);
if (auto repoPath = repoInfo.getPath(); repoPath && repoInfo.workdirInfo.submodules.empty()) {
Expand All @@ -964,7 +1072,7 @@ struct GitInputScheme : InputScheme
writeString("deleted:", hashSink);
writeString(file.abs(), hashSink);
}
return makeFingerprint(repoInfo.workdirInfo.headRev.value_or(nullRev))
return options.makeFingerprint(repoInfo.workdirInfo.headRev.value_or(nullRev))
+ ";d=" + hashSink.finish().hash.to_string(HashFormat::Base16, false);
}
return std::nullopt;
Expand Down
4 changes: 2 additions & 2 deletions src/libfetchers/github.cc
Original file line number Diff line number Diff line change
Expand Up @@ -326,7 +326,7 @@ struct GitArchiveInputScheme : InputScheme
input.attrs.insert_or_assign("lastModified", uint64_t(tarballInfo.lastModified));

auto accessor =
settings.getTarballCache()->getAccessor(tarballInfo.treeHash, false, "«" + input.to_string(true) + "»");
settings.getTarballCache()->getAccessor(tarballInfo.treeHash, {}, "«" + input.to_string(true) + "»");

if (!settings.trustTarballsFromGitForges)
// FIXME: computing the NAR hash here is wasteful if
Expand All @@ -350,7 +350,7 @@ struct GitArchiveInputScheme : InputScheme
std::optional<std::string> getFingerprint(ref<Store> store, const Input & input) const override
{
if (auto rev = input.getRev())
return rev->gitRev();
return "github:" + rev->gitRev();
else
return std::nullopt;
}
Comment on lines 350 to 356
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major

Fingerprint prefix "github:" is used for all GitArchiveInputScheme subclasses.

GitArchiveInputScheme::getFingerprint is inherited by GitHubInputScheme, GitLabInputScheme, and SourceHutInputScheme. Using "github:" as the prefix for GitLab and SourceHut inputs is misleading and could cause fingerprint collisions if different forges return the same commit hash.

Consider using schemeName() to generate a scheme-specific prefix:

     std::optional<std::string> getFingerprint(ref<Store> store, const Input & input) const override
     {
         if (auto rev = input.getRev())
-            return "github:" + rev->gitRev();
+            return std::string{schemeName()} + ":" + rev->gitRev();
         else
             return std::nullopt;
     }
📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
std::optional<std::string> getFingerprint(ref<Store> store, const Input & input) const override
{
if (auto rev = input.getRev())
return rev->gitRev();
return "github:" + rev->gitRev();
else
return std::nullopt;
}
std::optional<std::string> getFingerprint(ref<Store> store, const Input & input) const override
{
if (auto rev = input.getRev())
return std::string{schemeName()} + ":" + rev->gitRev();
else
return std::nullopt;
}
🤖 Prompt for AI Agents
In src/libfetchers/github.cc around lines 350 to 356, the fingerprint prefix is
hardcoded as "github:" which is used by all GitArchiveInputScheme subclasses;
change the code to build the prefix from the scheme name (e.g. use schemeName()
or the instance's scheme getter) so the returned fingerprint is schemeName() +
":" + rev->gitRev() when rev is present, otherwise return std::nullopt, ensuring
unique, scheme-specific fingerprints and avoiding collisions across
GitHub/GitLab/SourceHut.

Expand Down
Loading