Skip to content

Commit 2accee5

Browse files
committed
create_wd_tree() uses gix-status
It's the same idea as it is now, but it's faster as `gix` uses more threads.
1 parent 8e9c088 commit 2accee5

File tree

2 files changed

+204
-83
lines changed

2 files changed

+204
-83
lines changed

crates/gitbutler-repo/src/repository_ext.rs

Lines changed: 168 additions & 78 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@ use crate::Config;
22
use crate::SignaturePurpose;
33
use anyhow::{anyhow, bail, Context, Result};
44
use bstr::BString;
5-
use git2::{StatusOptions, Tree};
5+
use git2::Tree;
66
use gitbutler_commit::commit_headers::CommitHeadersV2;
77
use gitbutler_config::git::{GbConfig, GitConfig};
88
use gitbutler_error::error::Code;
@@ -11,14 +11,13 @@ use gitbutler_oxidize::{
1111
};
1212
use gitbutler_reference::{Refname, RemoteRefname};
1313
use gix::filter::plumbing::pipeline::convert::ToGitOutcome;
14-
use gix::fs::is_executable;
1514
use gix::objs::WriteTo;
16-
use std::io;
15+
use std::collections::HashSet;
1716
#[cfg(unix)]
1817
use std::os::unix::fs::PermissionsExt;
1918
#[cfg(windows)]
2019
use std::os::windows::process::CommandExt;
21-
use std::{io::Write, path::Path, process::Stdio, str};
20+
use std::{io::Write, process::Stdio, str};
2221
use tracing::instrument;
2322

2423
/// Extension trait for `git2::Repository`.
@@ -112,10 +111,16 @@ impl RepositoryExt for git2::Repository {
112111
/// Note that right now, it doesn't skip big files.
113112
///
114113
/// It should also be noted that this will fail if run on an empty branch
115-
/// or if the HEAD branch has no commits
114+
/// or if the HEAD branch has no commits.
116115
#[instrument(level = tracing::Level::DEBUG, skip(self), err(Debug))]
117116
fn create_wd_tree(&self) -> Result<Tree> {
118-
let gix_repo = gix::open_opts(
117+
use bstr::ByteSlice;
118+
use gix::dir::walk::EmissionMode;
119+
use gix::status;
120+
use gix::status::plumbing::index_as_worktree::{Change, EntryStatus};
121+
use gix::status::tree_index::TrackRenames;
122+
123+
let repo = gix::open_opts(
119124
self.path(),
120125
gix::open::Options::default().permissions(gix::open::Permissions {
121126
config: gix::open::permissions::Config {
@@ -126,86 +131,171 @@ impl RepositoryExt for git2::Repository {
126131
..Default::default()
127132
}),
128133
)?;
129-
let (mut pipeline, index) = gix_repo.filter_pipeline(None)?;
130-
let mut tree_update_builder = git2::build::TreeUpdateBuilder::new();
131-
132-
let worktree_path = self.workdir().context("Could not find worktree path")?;
133-
134-
let statuses = self.statuses(Some(
135-
StatusOptions::new()
136-
.renames_from_rewrites(false)
137-
.renames_head_to_index(false)
138-
.renames_index_to_workdir(false)
139-
.include_untracked(true)
140-
.recurse_untracked_dirs(true),
141-
))?;
142-
143-
// Truth table for upsert/remove:
144-
// | HEAD Tree -> Index | Index -> Worktree | Action |
145-
// | add | delete | no-action |
146-
// | modify | delete | remove |
147-
// | | delete | remove |
148-
// | delete | | remove |
149-
// | delete | add | upsert |
150-
// | add | | upsert |
151-
// | | add | upsert |
152-
// | add | modify | upsert |
153-
// | modify | modify | upsert |
154-
155-
let mut buf = Vec::with_capacity(1024);
156-
for status_entry in &statuses {
157-
let status = status_entry.status();
158-
let path = status_entry.path().context("Failed to get path")?;
159-
let path = Path::new(path);
160-
161-
if status.is_index_new() && status.is_wt_deleted() {
162-
// This is a no-op
163-
} else if (status.is_index_deleted() && !status.is_wt_new()) || status.is_wt_deleted() {
164-
tree_update_builder.remove(path);
165-
} else {
166-
let file_path = worktree_path.join(path).to_owned();
167-
168-
if file_path.is_symlink() {
169-
let resolved_path = file_path.read_link()?;
170-
let path_str = resolved_path
171-
.to_str()
172-
.context("Failed to convert path to str")?;
173-
174-
let blob = self.blob(path_str.as_bytes())?;
175-
tree_update_builder.upsert(path, blob, git2::FileMode::Link);
176-
} else if let io::Result::Ok(file) = std::fs::File::open(&file_path) {
177-
// We might have an entry for a file that does not exist on disk,
178-
// like in the case of a file conflict.
179-
let file_for_git = pipeline.convert_to_git(file, path, &index)?;
180-
let data = match file_for_git {
181-
ToGitOutcome::Unchanged(mut file) => {
182-
buf.clear();
183-
std::io::copy(&mut file, &mut buf)?;
184-
&buf
185-
}
186-
ToGitOutcome::Buffer(buf) => buf,
187-
ToGitOutcome::Process(mut read) => {
188-
buf.clear();
189-
std::io::copy(&mut read, &mut buf)?;
190-
&buf
191-
}
134+
let (mut pipeline, index) = repo.filter_pipeline(None)?;
135+
let workdir = repo.work_dir().context("Need non-bare repository")?;
136+
let mut head_tree_editor = repo.edit_tree(repo.head_tree_id()?)?;
137+
let status_changes = repo
138+
.status(gix::progress::Discard)?
139+
.tree_index_track_renames(TrackRenames::Disabled)
140+
.index_worktree_rewrites(None)
141+
.index_worktree_submodules(None)
142+
.index_worktree_options_mut(|opts| {
143+
if let Some(opts) = opts.dirwalk_options.as_mut() {
144+
opts.set_emit_ignored(None)
145+
.set_emit_pruned(false)
146+
.set_emit_tracked(false)
147+
.set_emit_untracked(EmissionMode::Matching)
148+
.set_emit_collapsed(None);
149+
}
150+
})
151+
.into_iter(None)?;
152+
153+
let mut worktreepaths_changed = HashSet::new();
154+
for change in status_changes {
155+
let change = change?;
156+
match change {
157+
status::Item::TreeIndex(gix::diff::index::Change::Deletion {
158+
location, ..
159+
}) => {
160+
// These changes play second fiddle - they are overwritten by worktree-changes,
161+
// or we assure we don't overwrite, as we may arrive out of order.
162+
if !worktreepaths_changed.contains(location.as_bstr()) {
163+
head_tree_editor.remove(location.as_ref())?;
164+
}
165+
}
166+
status::Item::TreeIndex(
167+
gix::diff::index::Change::Addition {
168+
location,
169+
entry_mode,
170+
id,
171+
..
172+
}
173+
| gix::diff::index::Change::Modification {
174+
location,
175+
entry_mode,
176+
id,
177+
..
178+
},
179+
) => {
180+
if let Some(entry_mode) = entry_mode
181+
.to_tree_entry_mode()
182+
// These changes play second fiddle - they are overwritten by worktree-changes,
183+
// or we assure we don't overwrite, as we may arrive out of order.
184+
.filter(|_| !worktreepaths_changed.contains(location.as_bstr()))
185+
{
186+
head_tree_editor.upsert(
187+
location.as_ref(),
188+
entry_mode.kind(),
189+
id.as_ref(),
190+
)?;
191+
}
192+
}
193+
status::Item::IndexWorktree(gix::status::index_worktree::Item::Modification {
194+
rela_path,
195+
status: EntryStatus::Change(Change::Removed),
196+
..
197+
}) => {
198+
head_tree_editor.remove(rela_path.as_bstr())?;
199+
worktreepaths_changed.insert(rela_path);
200+
}
201+
// modified or untracked files are unconditionally added as blob.
202+
// Note that this implementation will re-read the whole blob even on type-change
203+
status::Item::IndexWorktree(
204+
gix::status::index_worktree::Item::Modification {
205+
rela_path,
206+
status:
207+
EntryStatus::Change(Change::Type | Change::Modification { .. })
208+
| EntryStatus::IntentToAdd,
209+
..
210+
}
211+
| gix::status::index_worktree::Item::DirectoryContents {
212+
entry:
213+
gix::dir::Entry {
214+
rela_path,
215+
status: gix::dir::entry::Status::Untracked,
216+
..
217+
},
218+
..
219+
},
220+
) => {
221+
let rela_path_as_path = gix::path::from_bstr(&rela_path);
222+
let path = workdir.join(&rela_path_as_path);
223+
let Ok(md) = std::fs::symlink_metadata(&path) else {
224+
continue;
192225
};
193-
let blob_id = self.blob(data)?;
194-
195-
let file_type = if is_executable(&file_path.metadata()?) {
196-
git2::FileMode::BlobExecutable
226+
let (id, kind) = if md.is_symlink() {
227+
let target = std::fs::read_link(&path).with_context(|| {
228+
format!(
229+
"Failed to read link at '{}' for adding to the object database",
230+
path.display()
231+
)
232+
})?;
233+
let id = repo.write_blob(gix::path::into_bstr(target).as_bytes())?;
234+
(id, gix::object::tree::EntryKind::Link)
235+
} else if md.is_file() {
236+
let file = std::fs::File::open(&path).with_context(|| {
237+
format!(
238+
"Could not open file at '{}' for adding it to the object database",
239+
path.display()
240+
)
241+
})?;
242+
let file_for_git =
243+
pipeline.convert_to_git(file, rela_path_as_path.as_ref(), &index)?;
244+
let id = match file_for_git {
245+
ToGitOutcome::Unchanged(mut file) => {
246+
repo.write_blob_stream(&mut file)?
247+
}
248+
ToGitOutcome::Buffer(buf) => repo.write_blob(buf)?,
249+
ToGitOutcome::Process(mut read) => repo.write_blob_stream(&mut read)?,
250+
};
251+
252+
let kind = if gix::fs::is_executable(&md) {
253+
gix::object::tree::EntryKind::BlobExecutable
254+
} else {
255+
gix::object::tree::EntryKind::Blob
256+
};
257+
(id, kind)
197258
} else {
198-
git2::FileMode::Blob
259+
// This is probably a type-change to something we can't track. Instead of keeping
260+
// what's in `HEAD^{tree}` we remove the entry.
261+
head_tree_editor.remove(rela_path.as_bstr())?;
262+
worktreepaths_changed.insert(rela_path);
263+
continue;
199264
};
200265

201-
tree_update_builder.upsert(path, blob_id, file_type);
266+
head_tree_editor.upsert(rela_path.as_bstr(), kind, id)?;
267+
worktreepaths_changed.insert(rela_path);
268+
}
269+
status::Item::IndexWorktree(gix::status::index_worktree::Item::Rewrite {
270+
..
271+
})
272+
| status::Item::TreeIndex(gix::diff::index::Change::Rewrite { .. }) => {
273+
unreachable!("disabled")
202274
}
275+
status::Item::IndexWorktree(
276+
gix::status::index_worktree::Item::Modification {
277+
status:
278+
EntryStatus::Conflict(_)
279+
| EntryStatus::NeedsUpdate(_)
280+
| EntryStatus::Change(Change::SubmoduleModification(_)),
281+
..
282+
}
283+
| gix::status::index_worktree::Item::DirectoryContents {
284+
entry:
285+
gix::dir::Entry {
286+
status:
287+
gix::dir::entry::Status::Tracked
288+
| gix::dir::entry::Status::Pruned
289+
| gix::dir::entry::Status::Ignored(_),
290+
..
291+
},
292+
..
293+
},
294+
) => {}
203295
}
204296
}
205297

206-
let head_tree = self.head()?.peel_to_tree()?;
207-
let tree_oid = tree_update_builder.create_updated(self, &head_tree)?;
208-
298+
let tree_oid = gix_to_git2_oid(head_tree_editor.write()?);
209299
Ok(self.find_tree(tree_oid)?)
210300
}
211301

crates/gitbutler-repo/tests/create_wd_tree.rs

Lines changed: 36 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -177,16 +177,19 @@ mod head_upsert_truthtable {
177177

178178
// | modify | modify | upsert |
179179
#[test]
180-
fn index_modify_worktree_modify() -> anyhow::Result<()> {
180+
fn index_modify_worktree_modify_racy_git() -> anyhow::Result<()> {
181181
let test = TestingRepository::open_with_initial_commit(&[("file1.txt", "content1")]);
182182

183-
std::fs::write(test.tempdir.path().join("file1.txt"), "content2")?;
183+
let file_path = test.tempdir.path().join("file1.txt");
184+
std::fs::write(&file_path, "content2")?;
184185

185186
let mut index = test.repository.index()?;
186187
index.add_path(Path::new("file1.txt"))?;
187188
index.write()?;
188189

189-
std::fs::write(test.tempdir.path().join("file1.txt"), "content3")?;
190+
// This change is made within the same second, so if racy-git isn't handled correctly,
191+
// this change won't be seen.
192+
std::fs::write(file_path, "content3")?;
190193

191194
let tree: git2::Tree = test.repository.create_wd_tree()?;
192195

@@ -196,6 +199,27 @@ mod head_upsert_truthtable {
196199
"#);
197200
Ok(())
198201
}
202+
203+
// | modify | | upsert |
204+
#[test]
205+
fn index_modify() -> anyhow::Result<()> {
206+
let test = TestingRepository::open_with_initial_commit(&[("file1.txt", "content1")]);
207+
208+
let file_path = test.tempdir.path().join("file1.txt");
209+
std::fs::write(&file_path, "content2")?;
210+
211+
let mut index = test.repository.index()?;
212+
index.add_path(Path::new("file1.txt"))?;
213+
index.write()?;
214+
215+
let tree: git2::Tree = test.repository.create_wd_tree()?;
216+
217+
insta::assert_snapshot!(visualize_git2_tree(tree.id(), &test.repository), @r#"
218+
f87e9ef
219+
└── file1.txt:100644:db00fd6 "content2"
220+
"#);
221+
Ok(())
222+
}
199223
}
200224

201225
#[test]
@@ -398,7 +422,11 @@ fn tracked_file_becomes_directory_in_worktree() -> anyhow::Result<()> {
398422
std::fs::write(worktree_path.join("file"), "content in directory")?;
399423

400424
let tree: git2::Tree = test.repository.create_wd_tree().unwrap();
401-
insta::assert_snapshot!(visualize_git2_tree(tree.id(), &test.repository), @r"");
425+
insta::assert_snapshot!(visualize_git2_tree(tree.id(), &test.repository), @r#"
426+
8b80519
427+
└── soon-directory:df6d699
428+
└── file:100644:dadf628 "content in directory"
429+
"#);
402430
Ok(())
403431
}
404432

@@ -413,7 +441,10 @@ fn tracked_directory_becomes_file_in_worktree() -> anyhow::Result<()> {
413441
std::fs::write(worktree_path, "content")?;
414442

415443
let tree: git2::Tree = test.repository.create_wd_tree().unwrap();
416-
insta::assert_snapshot!(visualize_git2_tree(tree.id(), &test.repository), @r"");
444+
insta::assert_snapshot!(visualize_git2_tree(tree.id(), &test.repository), @r#"
445+
637be29
446+
└── soon-file:100644:6b584e8 "content"
447+
"#);
417448
Ok(())
418449
}
419450

0 commit comments

Comments
 (0)