Skip to content

Commit 36e97b7

Browse files
committed
create_wd_tree() uses gix-status
It's the same idea as it was with `git2`, but it's faster as `gix` uses more threads. Further improvements: * handle dir-to-file and file-to-dir conversions * pin current behaviour more with additional tests * add submodule support
1 parent e5248b1 commit 36e97b7

File tree

3 files changed

+323
-85
lines changed

3 files changed

+323
-85
lines changed

crates/gitbutler-repo/src/repository_ext.rs

Lines changed: 183 additions & 79 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@ use crate::Config;
22
use crate::SignaturePurpose;
33
use anyhow::{anyhow, bail, Context, Result};
44
use bstr::BString;
5-
use git2::{StatusOptions, Tree};
5+
use git2::Tree;
66
use gitbutler_commit::commit_headers::CommitHeadersV2;
77
use gitbutler_config::git::{GbConfig, GitConfig};
88
use gitbutler_error::error::Code;
@@ -11,14 +11,13 @@ use gitbutler_oxidize::{
1111
};
1212
use gitbutler_reference::{Refname, RemoteRefname};
1313
use gix::filter::plumbing::pipeline::convert::ToGitOutcome;
14-
use gix::fs::is_executable;
1514
use gix::objs::WriteTo;
16-
use std::io;
15+
use std::collections::HashSet;
1716
#[cfg(unix)]
1817
use std::os::unix::fs::PermissionsExt;
1918
#[cfg(windows)]
2019
use std::os::windows::process::CommandExt;
21-
use std::{io::Write, path::Path, process::Stdio, str};
20+
use std::{io::Write, process::Stdio, str};
2221
use tracing::instrument;
2322

2423
/// Extension trait for `git2::Repository`.
@@ -106,16 +105,22 @@ impl RepositoryExt for git2::Repository {
106105
Ok(branch)
107106
}
108107

109-
/// Note that this will add all untracked and modified files in the worktree to
108+
/// Add all untracked and modified files in the worktree to
110109
/// the object database, and create a tree from it.
111110
///
112111
/// Note that right now, it doesn't skip big files.
113112
///
114113
/// It should also be noted that this will fail if run on an empty branch
115-
/// or if the HEAD branch has no commits
114+
/// or if the HEAD branch has no commits.
116115
#[instrument(level = tracing::Level::DEBUG, skip(self), err(Debug))]
117116
fn create_wd_tree(&self) -> Result<Tree> {
118-
let gix_repo = gix::open_opts(
117+
use bstr::ByteSlice;
118+
use gix::dir::walk::EmissionMode;
119+
use gix::status;
120+
use gix::status::plumbing::index_as_worktree::{Change, EntryStatus};
121+
use gix::status::tree_index::TrackRenames;
122+
123+
let repo = gix::open_opts(
119124
self.path(),
120125
gix::open::Options::default().permissions(gix::open::Permissions {
121126
config: gix::open::permissions::Config {
@@ -126,86 +131,185 @@ impl RepositoryExt for git2::Repository {
126131
..Default::default()
127132
}),
128133
)?;
129-
let (mut pipeline, index) = gix_repo.filter_pipeline(None)?;
130-
let mut tree_update_builder = git2::build::TreeUpdateBuilder::new();
131-
132-
let worktree_path = self.workdir().context("Could not find worktree path")?;
133-
134-
let statuses = self.statuses(Some(
135-
StatusOptions::new()
136-
.renames_from_rewrites(false)
137-
.renames_head_to_index(false)
138-
.renames_index_to_workdir(false)
139-
.include_untracked(true)
140-
.recurse_untracked_dirs(true),
141-
))?;
142-
143-
// Truth table for upsert/remove:
144-
// | HEAD Tree -> Index | Index -> Worktree | Action |
145-
// | add | delete | no-action |
146-
// | modify | delete | remove |
147-
// | | delete | remove |
148-
// | delete | | remove |
149-
// | delete | add | upsert |
150-
// | add | | upsert |
151-
// | | add | upsert |
152-
// | add | modify | upsert |
153-
// | modify | modify | upsert |
154-
155-
let mut buf = Vec::with_capacity(1024);
156-
for status_entry in &statuses {
157-
let status = status_entry.status();
158-
let path = status_entry.path().context("Failed to get path")?;
159-
let path = Path::new(path);
160-
161-
if status.is_index_new() && status.is_wt_deleted() {
162-
// This is a no-op
163-
} else if (status.is_index_deleted() && !status.is_wt_new()) || status.is_wt_deleted() {
164-
tree_update_builder.remove(path);
165-
} else {
166-
let file_path = worktree_path.join(path).to_owned();
167-
168-
if file_path.is_symlink() {
169-
let resolved_path = file_path.read_link()?;
170-
let path_str = resolved_path
171-
.to_str()
172-
.context("Failed to convert path to str")?;
173-
174-
let blob = self.blob(path_str.as_bytes())?;
175-
tree_update_builder.upsert(path, blob, git2::FileMode::Link);
176-
} else if let io::Result::Ok(file) = std::fs::File::open(&file_path) {
177-
// We might have an entry for a file that does not exist on disk,
178-
// like in the case of a file conflict.
179-
let file_for_git = pipeline.convert_to_git(file, path, &index)?;
180-
let data = match file_for_git {
181-
ToGitOutcome::Unchanged(mut file) => {
182-
buf.clear();
183-
std::io::copy(&mut file, &mut buf)?;
184-
&buf
185-
}
186-
ToGitOutcome::Buffer(buf) => buf,
187-
ToGitOutcome::Process(mut read) => {
188-
buf.clear();
189-
std::io::copy(&mut read, &mut buf)?;
190-
&buf
191-
}
134+
let (mut pipeline, index) = repo.filter_pipeline(None)?;
135+
let workdir = repo.work_dir().context("Need non-bare repository")?;
136+
let mut head_tree_editor = repo.edit_tree(repo.head_tree_id()?)?;
137+
let status_changes = repo
138+
.status(gix::progress::Discard)?
139+
.tree_index_track_renames(TrackRenames::Disabled)
140+
.index_worktree_rewrites(None)
141+
.index_worktree_submodules(gix::status::Submodule::Given {
142+
ignore: gix::submodule::config::Ignore::Dirty,
143+
check_dirty: true,
144+
})
145+
.index_worktree_options_mut(|opts| {
146+
if let Some(opts) = opts.dirwalk_options.as_mut() {
147+
opts.set_emit_ignored(None)
148+
.set_emit_pruned(false)
149+
.set_emit_tracked(false)
150+
.set_emit_untracked(EmissionMode::Matching)
151+
.set_emit_collapsed(None);
152+
}
153+
})
154+
.into_iter(None)?;
155+
156+
let mut worktreepaths_changed = HashSet::new();
157+
for change in status_changes {
158+
let change = change?;
159+
match change {
160+
status::Item::TreeIndex(gix::diff::index::Change::Deletion {
161+
location, ..
162+
}) => {
163+
// These changes play second fiddle - they are overwritten by worktree-changes,
164+
// or we assure we don't overwrite, as we may arrive out of order.
165+
if !worktreepaths_changed.contains(location.as_bstr()) {
166+
head_tree_editor.remove(location.as_ref())?;
167+
}
168+
}
169+
status::Item::TreeIndex(
170+
gix::diff::index::Change::Addition {
171+
location,
172+
entry_mode,
173+
id,
174+
..
175+
}
176+
| gix::diff::index::Change::Modification {
177+
location,
178+
entry_mode,
179+
id,
180+
..
181+
},
182+
) => {
183+
if let Some(entry_mode) = entry_mode
184+
.to_tree_entry_mode()
185+
// These changes play second fiddle - they are overwritten by worktree-changes,
186+
// or we assure we don't overwrite, as we may arrive out of order.
187+
.filter(|_| !worktreepaths_changed.contains(location.as_bstr()))
188+
{
189+
head_tree_editor.upsert(
190+
location.as_ref(),
191+
entry_mode.kind(),
192+
id.as_ref(),
193+
)?;
194+
}
195+
}
196+
status::Item::IndexWorktree(gix::status::index_worktree::Item::Modification {
197+
rela_path,
198+
status: EntryStatus::Change(Change::Removed),
199+
..
200+
}) => {
201+
head_tree_editor.remove(rela_path.as_bstr())?;
202+
worktreepaths_changed.insert(rela_path);
203+
}
204+
// modified or untracked files are unconditionally added as blob.
205+
// Note that this implementation will re-read the whole blob even on type-change
206+
status::Item::IndexWorktree(
207+
gix::status::index_worktree::Item::Modification {
208+
rela_path,
209+
status:
210+
EntryStatus::Change(Change::Type | Change::Modification { .. })
211+
| EntryStatus::IntentToAdd,
212+
..
213+
}
214+
| gix::status::index_worktree::Item::DirectoryContents {
215+
entry:
216+
gix::dir::Entry {
217+
rela_path,
218+
status: gix::dir::entry::Status::Untracked,
219+
..
220+
},
221+
..
222+
},
223+
) => {
224+
let rela_path_as_path = gix::path::from_bstr(&rela_path);
225+
let path = workdir.join(&rela_path_as_path);
226+
let Ok(md) = std::fs::symlink_metadata(&path) else {
227+
continue;
192228
};
193-
let blob_id = self.blob(data)?;
194-
195-
let file_type = if is_executable(&file_path.metadata()?) {
196-
git2::FileMode::BlobExecutable
229+
let (id, kind) = if md.is_symlink() {
230+
let target = std::fs::read_link(&path).with_context(|| {
231+
format!(
232+
"Failed to read link at '{}' for adding to the object database",
233+
path.display()
234+
)
235+
})?;
236+
let id = repo.write_blob(gix::path::into_bstr(target).as_bytes())?;
237+
(id, gix::object::tree::EntryKind::Link)
238+
} else if md.is_file() {
239+
let file = std::fs::File::open(&path).with_context(|| {
240+
format!(
241+
"Could not open file at '{}' for adding it to the object database",
242+
path.display()
243+
)
244+
})?;
245+
let file_for_git =
246+
pipeline.convert_to_git(file, rela_path_as_path.as_ref(), &index)?;
247+
let id = match file_for_git {
248+
ToGitOutcome::Unchanged(mut file) => {
249+
repo.write_blob_stream(&mut file)?
250+
}
251+
ToGitOutcome::Buffer(buf) => repo.write_blob(buf)?,
252+
ToGitOutcome::Process(mut read) => repo.write_blob_stream(&mut read)?,
253+
};
254+
255+
let kind = if gix::fs::is_executable(&md) {
256+
gix::object::tree::EntryKind::BlobExecutable
257+
} else {
258+
gix::object::tree::EntryKind::Blob
259+
};
260+
(id, kind)
197261
} else {
198-
git2::FileMode::Blob
262+
// This is probably a type-change to something we can't track. Instead of keeping
263+
// what's in `HEAD^{tree}` we remove the entry.
264+
head_tree_editor.remove(rela_path.as_bstr())?;
265+
worktreepaths_changed.insert(rela_path);
266+
continue;
199267
};
200268

201-
tree_update_builder.upsert(path, blob_id, file_type);
269+
head_tree_editor.upsert(rela_path.as_bstr(), kind, id)?;
270+
worktreepaths_changed.insert(rela_path);
202271
}
272+
status::Item::IndexWorktree(gix::status::index_worktree::Item::Modification {
273+
rela_path,
274+
status: EntryStatus::Change(Change::SubmoduleModification(change)),
275+
..
276+
}) => {
277+
if let Some(possibly_changed_head_commit) = change.checked_out_head_id {
278+
head_tree_editor.upsert(
279+
rela_path.as_bstr(),
280+
gix::object::tree::EntryKind::Commit,
281+
possibly_changed_head_commit,
282+
)?;
283+
worktreepaths_changed.insert(rela_path);
284+
}
285+
}
286+
status::Item::IndexWorktree(gix::status::index_worktree::Item::Rewrite {
287+
..
288+
})
289+
| status::Item::TreeIndex(gix::diff::index::Change::Rewrite { .. }) => {
290+
unreachable!("disabled")
291+
}
292+
status::Item::IndexWorktree(
293+
gix::status::index_worktree::Item::Modification {
294+
status: EntryStatus::Conflict(_) | EntryStatus::NeedsUpdate(_),
295+
..
296+
}
297+
| gix::status::index_worktree::Item::DirectoryContents {
298+
entry:
299+
gix::dir::Entry {
300+
status:
301+
gix::dir::entry::Status::Tracked
302+
| gix::dir::entry::Status::Pruned
303+
| gix::dir::entry::Status::Ignored(_),
304+
..
305+
},
306+
..
307+
},
308+
) => {}
203309
}
204310
}
205311

206-
let head_tree = self.head()?.peel_to_tree()?;
207-
let tree_oid = tree_update_builder.create_updated(self, &head_tree)?;
208-
312+
let tree_oid = gix_to_git2_oid(head_tree_editor.write()?);
209313
Ok(self.find_tree(tree_oid)?)
210314
}
211315

0 commit comments

Comments
 (0)