Skip to content

Commit 2b7122b

Browse files
committed
perf: Eliminate per-directory stat syscalls in untracked file detection
Pre-build a Gitignore matcher from tracked .gitignore files and disable the ignore crate's per-directory probing for .git, .gitignore, .ignore, and .git/info/exclude. Apply rules via filter_entry for subtree pruning. Discover untracked .gitignore files in a post-walk filter pass. Add 8 regression tests covering untracked detection edge cases: new nested directories, root-level files, gitignore negation, gitignore in untracked directories, empty directories, scale, and a comprehensive equivalence test against the subprocess path.
1 parent 03b79e0 commit 2b7122b

File tree

2 files changed

+367
-9
lines changed

2 files changed

+367
-9
lines changed

crates/turborepo-scm/src/git_index_regression_tests.rs

Lines changed: 287 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -820,3 +820,290 @@ fn test_gix_index_sorted_order_preserved_through_pipeline() {
820820
let zzz = repo.get_hashes("zzz-pkg");
821821
assert_eq!(zzz.len(), 1);
822822
}
823+
824+
// ═══════════════════════════════════════════════════════════════════════════
825+
// Category 4: Untracked File Detection Regression Tests
826+
//
827+
// These tests ensure that untracked file detection produces correct results
828+
// regardless of the underlying walk algorithm. They cover edge cases around
829+
// directory discovery, gitignore handling, and the interaction between the
830+
// git index and the filesystem.
831+
// ═══════════════════════════════════════════════════════════════════════════
832+
833+
#[test]
834+
fn test_untracked_in_new_nested_directories() {
835+
let repo = TestRepo::new();
836+
837+
repo.create_file("my-pkg/src/index.ts", "code");
838+
repo.create_file("my-pkg/package.json", "{}");
839+
repo.create_file("package.json", "{}");
840+
repo.commit_all();
841+
842+
// Create untracked files in entirely new directories that don't exist
843+
// in the git index at all.
844+
repo.create_file("my-pkg/new-dir/untracked.ts", "new");
845+
repo.create_file("my-pkg/new-dir/sub/deep-untracked.ts", "deep new");
846+
repo.create_file("my-pkg/another-new/file.ts", "another");
847+
848+
let hashes = repo.get_hashes("my-pkg");
849+
assert!(hashes.contains_key(&path("src/index.ts")));
850+
assert!(hashes.contains_key(&path("new-dir/untracked.ts")));
851+
assert!(hashes.contains_key(&path("new-dir/sub/deep-untracked.ts")));
852+
assert!(hashes.contains_key(&path("another-new/file.ts")));
853+
assert_eq!(hashes.len(), 5);
854+
855+
// Equivalence: same result without the index
856+
let hashes_no_index = repo.get_hashes_no_index("my-pkg");
857+
assert_eq!(hashes, hashes_no_index);
858+
}
859+
860+
#[test]
861+
fn test_untracked_at_repo_root() {
862+
let repo = TestRepo::new();
863+
864+
repo.create_file("my-pkg/file.ts", "pkg");
865+
repo.create_file("package.json", "{}");
866+
repo.commit_all();
867+
868+
repo.create_file("root-untracked.txt", "at root");
869+
repo.create_file("another-root-file.js", "also root");
870+
871+
// Root query should see untracked files at the repo root
872+
let root_hashes = repo.get_hashes("");
873+
assert!(root_hashes.contains_key(&path("root-untracked.txt")));
874+
assert!(root_hashes.contains_key(&path("another-root-file.js")));
875+
assert!(root_hashes.contains_key(&path("my-pkg/file.ts")));
876+
877+
// Package query should NOT see root-level untracked files
878+
let pkg_hashes = repo.get_hashes("my-pkg");
879+
assert!(!pkg_hashes.contains_key(&path("root-untracked.txt")));
880+
assert_eq!(pkg_hashes.len(), 1);
881+
}
882+
883+
#[test]
884+
fn test_gitignore_negation_patterns() {
885+
let repo = TestRepo::new();
886+
887+
repo.create_gitignore(".gitignore", "*.log\n!important.log\n");
888+
repo.create_file("my-pkg/src/index.ts", "code");
889+
repo.create_file("my-pkg/package.json", "{}");
890+
repo.commit_all();
891+
892+
repo.create_file("my-pkg/debug.log", "debug output");
893+
repo.create_file("my-pkg/important.log", "keep me");
894+
repo.create_file("my-pkg/error.log", "error output");
895+
896+
let hashes = repo.get_hashes("my-pkg");
897+
assert!(
898+
!hashes.contains_key(&path("debug.log")),
899+
"debug.log should be gitignored"
900+
);
901+
assert!(
902+
!hashes.contains_key(&path("error.log")),
903+
"error.log should be gitignored"
904+
);
905+
assert!(
906+
hashes.contains_key(&path("important.log")),
907+
"important.log should NOT be gitignored (negation pattern)"
908+
);
909+
910+
let hashes_no_index = repo.get_hashes_no_index("my-pkg");
911+
assert_eq!(hashes, hashes_no_index);
912+
}
913+
914+
#[test]
915+
fn test_gitignore_in_untracked_directory() {
916+
let repo = TestRepo::new();
917+
918+
repo.create_file("my-pkg/src/index.ts", "code");
919+
repo.create_file("my-pkg/package.json", "{}");
920+
repo.commit_all();
921+
922+
// Create a new directory that doesn't exist in the index, with its own
923+
// .gitignore inside it
924+
repo.create_file("my-pkg/new-dir/.gitignore", "*.tmp\n");
925+
repo.create_file("my-pkg/new-dir/keep.ts", "keep");
926+
repo.create_file("my-pkg/new-dir/skip.tmp", "should be ignored");
927+
repo.create_file("my-pkg/new-dir/sub/also-keep.ts", "also keep");
928+
repo.create_file("my-pkg/new-dir/sub/also-skip.tmp", "also ignored");
929+
930+
let hashes = repo.get_hashes("my-pkg");
931+
assert!(hashes.contains_key(&path("new-dir/keep.ts")));
932+
assert!(hashes.contains_key(&path("new-dir/sub/also-keep.ts")));
933+
assert!(hashes.contains_key(&path("new-dir/.gitignore")));
934+
assert!(
935+
!hashes.contains_key(&path("new-dir/skip.tmp")),
936+
".gitignore in untracked dir should be respected"
937+
);
938+
assert!(
939+
!hashes.contains_key(&path("new-dir/sub/also-skip.tmp")),
940+
".gitignore in untracked dir should apply to subdirs"
941+
);
942+
943+
let hashes_no_index = repo.get_hashes_no_index("my-pkg");
944+
assert_eq!(hashes, hashes_no_index);
945+
}
946+
947+
#[test]
948+
fn test_empty_directories_on_disk() {
949+
let repo = TestRepo::new();
950+
951+
repo.create_file("my-pkg/file.ts", "content");
952+
repo.create_file("my-pkg/package.json", "{}");
953+
repo.commit_all();
954+
955+
// Create empty directories — these should not cause errors or
956+
// produce spurious results
957+
let empty1 = repo.root.join_unix_path(path("my-pkg/empty-dir"));
958+
empty1.create_dir_all().unwrap();
959+
let empty2 = repo
960+
.root
961+
.join_unix_path(path("my-pkg/empty-dir/nested-empty"));
962+
empty2.create_dir_all().unwrap();
963+
let empty3 = repo.root.join_unix_path(path("other-empty"));
964+
empty3.create_dir_all().unwrap();
965+
966+
let hashes = repo.get_hashes("my-pkg");
967+
assert_eq!(hashes.len(), 2, "empty dirs should not add files");
968+
assert!(hashes.contains_key(&path("file.ts")));
969+
assert!(hashes.contains_key(&path("package.json")));
970+
971+
// Root should also handle empty dirs gracefully
972+
let root_hashes = repo.get_hashes("");
973+
assert!(root_hashes.contains_key(&path("my-pkg/file.ts")));
974+
}
975+
976+
#[test]
977+
fn test_many_untracked_files_across_many_new_directories() {
978+
let repo = TestRepo::new();
979+
980+
repo.create_file("base-pkg/committed.ts", "committed");
981+
repo.create_file("base-pkg/package.json", "{}");
982+
repo.create_file("package.json", "{}");
983+
repo.commit_all();
984+
985+
// Create 50 untracked files across 10 new directories
986+
for dir_idx in 0..10 {
987+
for file_idx in 0..5 {
988+
repo.create_file(
989+
&format!("base-pkg/new-dir-{}/file-{}.ts", dir_idx, file_idx),
990+
&format!("content {} {}", dir_idx, file_idx),
991+
);
992+
}
993+
}
994+
995+
let hashes = repo.get_hashes("base-pkg");
996+
// 2 committed + 50 untracked = 52
997+
assert_eq!(hashes.len(), 52);
998+
assert!(hashes.contains_key(&path("committed.ts")));
999+
assert!(hashes.contains_key(&path("new-dir-0/file-0.ts")));
1000+
assert!(hashes.contains_key(&path("new-dir-9/file-4.ts")));
1001+
1002+
let hashes_no_index = repo.get_hashes_no_index("base-pkg");
1003+
assert_eq!(hashes, hashes_no_index);
1004+
}
1005+
1006+
#[test]
1007+
fn test_untracked_detection_equivalence_comprehensive() {
1008+
// Comprehensive equivalence test: set up a complex repo state and verify
1009+
// the index-based path produces identical results to the subprocess path.
1010+
let repo = TestRepo::new();
1011+
1012+
// Root-level gitignore
1013+
repo.create_gitignore(".gitignore", "*.log\ndist/\n.cache/\n");
1014+
1015+
// Multiple packages at different depths
1016+
repo.create_file("apps/web/src/index.ts", "web code");
1017+
repo.create_file("apps/web/src/utils.ts", "utils");
1018+
repo.create_file("apps/web/package.json", "{}");
1019+
repo.create_file("apps/docs/README.md", "docs");
1020+
repo.create_file("apps/docs/package.json", "{}");
1021+
repo.create_file("packages/ui/src/button.tsx", "button");
1022+
repo.create_file("packages/ui/package.json", "{}");
1023+
repo.create_file("packages/shared/lib/helpers.ts", "helpers");
1024+
repo.create_file("packages/shared/package.json", "{}");
1025+
1026+
// Nested gitignore
1027+
repo.create_gitignore("packages/ui/.gitignore", "storybook-static/\n");
1028+
1029+
repo.create_file("package.json", "{}");
1030+
repo.commit_all();
1031+
1032+
// Now create a complex dirty state:
1033+
// - Modified tracked file
1034+
repo.create_file("apps/web/src/index.ts", "modified web code");
1035+
// - Deleted tracked file
1036+
repo.delete_file("apps/web/src/utils.ts");
1037+
// - Untracked files in existing directories
1038+
repo.create_file("apps/web/src/new-component.tsx", "new component");
1039+
repo.create_file("packages/ui/src/dialog.tsx", "dialog");
1040+
// - Untracked files in new directories
1041+
repo.create_file("apps/web/tests/app.test.ts", "test");
1042+
repo.create_file("packages/shared/lib/internal/deep.ts", "deep file");
1043+
// - Files that should be gitignored
1044+
repo.create_file("apps/web/debug.log", "log output");
1045+
repo.create_file("apps/web/dist/bundle.js", "compiled");
1046+
repo.create_file("packages/ui/storybook-static/index.html", "storybook");
1047+
repo.create_file("apps/web/.cache/data.json", "cache");
1048+
// - Untracked file at root
1049+
repo.create_file("turbo.json", "{}");
1050+
1051+
// Verify every package produces identical results with and without index
1052+
let packages = [
1053+
"apps/web",
1054+
"apps/docs",
1055+
"packages/ui",
1056+
"packages/shared",
1057+
"",
1058+
];
1059+
for pkg in packages {
1060+
let with_index = repo.get_hashes(pkg);
1061+
let without_index = repo.get_hashes_no_index(pkg);
1062+
assert_eq!(
1063+
with_index, without_index,
1064+
"index vs no-index mismatch for package {:?}",
1065+
pkg,
1066+
);
1067+
}
1068+
1069+
// Spot-check specific expectations
1070+
let web = repo.get_hashes("apps/web");
1071+
assert!(
1072+
web.contains_key(&path("src/index.ts")),
1073+
"modified file should be present"
1074+
);
1075+
assert!(
1076+
!web.contains_key(&path("src/utils.ts")),
1077+
"deleted file should be absent"
1078+
);
1079+
assert!(
1080+
web.contains_key(&path("src/new-component.tsx")),
1081+
"untracked in existing dir"
1082+
);
1083+
assert!(
1084+
web.contains_key(&path("tests/app.test.ts")),
1085+
"untracked in new dir"
1086+
);
1087+
assert!(
1088+
!web.contains_key(&path("debug.log")),
1089+
"gitignored by root .gitignore"
1090+
);
1091+
assert!(
1092+
!web.contains_key(&path("dist/bundle.js")),
1093+
"gitignored directory"
1094+
);
1095+
assert!(
1096+
!web.contains_key(&path(".cache/data.json")),
1097+
"gitignored directory"
1098+
);
1099+
1100+
let ui = repo.get_hashes("packages/ui");
1101+
assert!(
1102+
ui.contains_key(&path("src/dialog.tsx")),
1103+
"untracked in existing dir"
1104+
);
1105+
assert!(
1106+
!ui.contains_key(&path("storybook-static/index.html")),
1107+
"gitignored by nested .gitignore"
1108+
);
1109+
}

0 commit comments

Comments
 (0)