Skip to content

Commit d40f661

Browse files
authored
Merge pull request #14462 from NixOS/parallel-revcount
GitRepo::getRevCount(): Compute revcount in parallel
2 parents 341c42f + 9657fea commit d40f661

File tree

1 file changed

+45
-12
lines changed

1 file changed

+45
-12
lines changed

src/libfetchers/git-utils.cc

Lines changed: 45 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,8 @@
1010
#include "nix/util/fs-sink.hh"
1111
#include "nix/util/sync.hh"
1212
#include "nix/util/util.hh"
13+
#include "nix/util/thread-pool.hh"
14+
#include "nix/util/pool.hh"
1315

1416
#include <git2/attr.h>
1517
#include <git2/blob.h>
@@ -33,12 +35,14 @@
3335
#include <git2/tag.h>
3436
#include <git2/tree.h>
3537

38+
#include <boost/unordered/concurrent_flat_set.hpp>
3639
#include <boost/unordered/unordered_flat_map.hpp>
3740
#include <boost/unordered/unordered_flat_set.hpp>
3841
#include <iostream>
3942
#include <queue>
4043
#include <regex>
4144
#include <span>
45+
#include <ranges>
4246

4347
namespace std {
4448

@@ -227,12 +231,16 @@ struct GitRepoImpl : GitRepo, std::enable_shared_from_this<GitRepoImpl>
227231
{
228232
/** Location of the repository on disk. */
229233
std::filesystem::path path;
234+
235+
bool bare;
236+
230237
/**
231238
* libgit2 repository. Note that new objects are not written to disk,
232239
* because we are using a mempack backend. For writing to disk, see
233240
* `flush()`, which is also called by `GitFileSystemObjectSink::sync()`.
234241
*/
235242
Repository repo;
243+
236244
/**
237245
* In-memory object store for efficient batched writing to packfiles.
238246
* Owned by `repo`.
@@ -241,6 +249,7 @@ struct GitRepoImpl : GitRepo, std::enable_shared_from_this<GitRepoImpl>
241249

242250
GitRepoImpl(std::filesystem::path _path, bool create, bool bare)
243251
: path(std::move(_path))
252+
, bare(bare)
244253
{
245254
initLibGit2();
246255

@@ -317,32 +326,56 @@ struct GitRepoImpl : GitRepo, std::enable_shared_from_this<GitRepoImpl>
317326
checkInterrupt();
318327
}
319328

329+
/**
330+
* Return a connection pool for this repo. Useful for
331+
* multithreaded access.
332+
*/
333+
Pool<GitRepoImpl> getPool()
334+
{
335+
// TODO: as an optimization, it would be nice to include `this` in the pool.
336+
return Pool<GitRepoImpl>(std::numeric_limits<size_t>::max(), [this]() -> ref<GitRepoImpl> {
337+
return make_ref<GitRepoImpl>(path, false, bare);
338+
});
339+
}
340+
320341
uint64_t getRevCount(const Hash & rev) override
321342
{
322-
boost::unordered_flat_set<git_oid, std::hash<git_oid>> done;
323-
std::queue<Commit> todo;
343+
boost::concurrent_flat_set<git_oid, std::hash<git_oid>> done;
324344

325-
todo.push(peelObject<Commit>(lookupObject(*this, hashToOID(rev)).get(), GIT_OBJECT_COMMIT));
345+
auto startCommit = peelObject<Commit>(lookupObject(*this, hashToOID(rev)).get(), GIT_OBJECT_COMMIT);
346+
auto startOid = *git_commit_id(startCommit.get());
347+
done.insert(startOid);
326348

327-
while (auto commit = pop(todo)) {
328-
if (!done.insert(*git_commit_id(commit->get())).second)
329-
continue;
349+
auto repoPool(getPool());
350+
351+
ThreadPool pool;
352+
353+
auto process = [&done, &pool, &repoPool](this const auto & process, const git_oid & oid) -> void {
354+
auto repo(repoPool.get());
355+
356+
auto _commit = lookupObject(*repo, oid, GIT_OBJECT_COMMIT);
357+
auto commit = (const git_commit *) &*_commit;
330358

331-
for (size_t n = 0; n < git_commit_parentcount(commit->get()); ++n) {
332-
git_commit * parent;
333-
if (git_commit_parent(&parent, commit->get(), n)) {
359+
for (auto n : std::views::iota(0U, git_commit_parentcount(commit))) {
360+
auto parentOid = git_commit_parent_id(commit, n);
361+
if (!parentOid) {
334362
throw Error(
335363
"Failed to retrieve the parent of Git commit '%s': %s. "
336364
"This may be due to an incomplete repository history. "
337365
"To resolve this, either enable the shallow parameter in your flake URL (?shallow=1) "
338366
"or add set the shallow parameter to true in builtins.fetchGit, "
339367
"or fetch the complete history for this branch.",
340-
*git_commit_id(commit->get()),
368+
*git_commit_id(commit),
341369
git_error_last()->message);
342370
}
343-
todo.push(Commit(parent));
371+
if (done.insert(*parentOid))
372+
pool.enqueue(std::bind(process, *parentOid));
344373
}
345-
}
374+
};
375+
376+
pool.enqueue(std::bind(process, startOid));
377+
378+
pool.process();
346379

347380
return done.size();
348381
}

0 commit comments

Comments
 (0)