Skip to content

Commit e2d4e61

Browse files
committed
perf: Replace gix-index + filesystem walk with parallel git subprocesses
Build RepoGitIndex from three parallel git subprocesses instead of gix-index stat + ignore-crate walk: - git ls-tree -r HEAD -z (blob OIDs, ~60ms) - git diff-index HEAD -z (modified/deleted, ~95-150ms) - git ls-files --others -z (untracked, ~230-530ms) The three run on separate threads and complete in max(ls-files) time. Git's internal implementations are optimized for large repos — ls-tree reads from packfiles without stat, diff-index only stats files that differ, and ls-files uses the index for efficient untracked detection. Benchmark (110-package monorepo, 30 runs, sandboxed Linux): baseline: 878ms ± 27ms improved: 437ms ± 7ms (2.01x faster) On macOS (APFS), git ls-files --others is slower (~530ms vs ~230ms on Linux ext4), so the improvement is smaller. The approach is never slower than the previous implementation.
1 parent a9bbb9e commit e2d4e61

File tree

5 files changed

+717
-55
lines changed

5 files changed

+717
-55
lines changed

crates/turborepo-lib/src/run/builder.rs

Lines changed: 16 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -261,24 +261,12 @@ impl RunBuilder {
261261
);
262262
let start_at = Local::now();
263263

264-
let (tracked_index_tx, tracked_index_rx) =
265-
tokio::sync::oneshot::channel::<Option<turborepo_scm::RepoGitIndex>>();
266-
let (git_root_tx, git_root_rx) =
267-
tokio::sync::oneshot::channel::<Option<turbopath::AbsoluteSystemPathBuf>>();
268264
let scm_task = {
269265
let repo_root = self.repo_root.clone();
270266
let git_root = self.opts.git_root.clone();
271-
tokio::task::spawn_blocking(move || {
272-
let scm = match git_root {
273-
Some(root) => SCM::new_with_git_root(&repo_root, root),
274-
None => SCM::new(&repo_root),
275-
};
276-
// Send git root immediately so the filesystem walk can start
277-
// while index construction continues.
278-
let _ = git_root_tx.send(scm.git_root().map(|r| r.to_owned()));
279-
let repo_index = scm.build_tracked_repo_index_eager();
280-
let _ = tracked_index_tx.send(repo_index);
281-
scm
267+
tokio::task::spawn_blocking(move || match git_root {
268+
Some(root) => SCM::new_with_git_root(&repo_root, root),
269+
None => SCM::new(&repo_root),
282270
})
283271
};
284272
let package_json_path = self.repo_root.join_component("package.json");
@@ -354,39 +342,21 @@ impl RunBuilder {
354342
repo_telemetry.track_size(pkg_dep_graph.len());
355343
run_telemetry.track_run_type(self.opts.run_opts.dry_run.is_some());
356344

357-
// Spawn the filesystem walk as soon as the git root is resolved.
358-
// It only needs the git root and package prefixes, not the tracked
359-
// index. The walk runs in parallel with new_from_gix_index (~267ms).
360-
let all_prefixes = Self::all_package_prefixes(&pkg_dep_graph);
361-
let walk_task = if all_prefixes.is_empty() {
362-
None
363-
} else {
364-
Some(tokio::task::spawn(async move {
365-
let git_root = match git_root_rx.await {
366-
Ok(Some(root)) => root,
367-
_ => return None,
368-
};
369-
tokio::task::spawn_blocking(move || {
370-
let _span = tracing::info_span!("walk_candidate_files").entered();
371-
turborepo_scm::walk_candidate_files(git_root.as_std_path(), Some(&all_prefixes))
372-
.ok()
373-
})
374-
.await
375-
.ok()?
345+
// Build the repo index using three parallel git subprocesses
346+
// (ls-tree, diff-index, ls-files). This is faster than gix-index +
347+
// filesystem walk because git's internal implementations are optimized
348+
// for large repos (~350ms vs ~475ms on 185K files).
349+
let scm = scm_task
350+
.instrument(tracing::info_span!("scm_task_await"))
351+
.await
352+
.expect("detecting scm panicked");
353+
let repo_index_task = {
354+
let scm = scm.clone();
355+
Some(tokio::task::spawn_blocking(move || {
356+
let _span = tracing::info_span!("build_repo_index_subprocesses").entered();
357+
scm.build_repo_index_from_subprocesses()
376358
}))
377359
};
378-
379-
// Combine the walk results with the tracked index once both are ready.
380-
let repo_index_task = walk_task.map(|walk_task| {
381-
tokio::task::spawn(async move {
382-
let (candidates, tracked_index) = tokio::join!(walk_task, tracked_index_rx);
383-
let candidates = candidates.ok()??;
384-
let tracked_index = tracked_index.ok()??;
385-
let mut repo_index = tracked_index;
386-
repo_index.populate_untracked_from_candidates(candidates);
387-
Some(repo_index)
388-
})
389-
});
390360
let micro_frontend_configs = {
391361
let _span = tracing::info_span!("micro_frontends_from_disk").entered();
392362
match MicrofrontendsConfigs::from_disk(&self.repo_root, &pkg_dep_graph) {
@@ -497,13 +467,6 @@ impl RunBuilder {
497467
turbo_json_loader.preload_all();
498468
}
499469

500-
// Await the SCM background task. The tracked index was already
501-
// forwarded to the untracked walk via oneshot channel above.
502-
let scm = scm_task
503-
.instrument(tracing::info_span!("scm_task_await"))
504-
.await
505-
.expect("detecting scm panicked");
506-
507470
let filtered_pkgs = {
508471
let _span = tracing::info_span!("calculate_filtered_packages").entered();
509472
Self::calculate_filtered_packages(

0 commit comments

Comments
 (0)