Skip to content

Commit 94fa323

Browse files
committed
Skip big files in create_wd_tree()
1 parent 36e97b7 commit 94fa323

File tree

9 files changed

+209
-123
lines changed

9 files changed

+209
-123
lines changed

crates/gitbutler-branch-actions/src/branch_manager/branch_creation.rs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ use gitbutler_error::error::Marker;
1313
use gitbutler_oplog::SnapshotExt;
1414
use gitbutler_oxidize::GixRepositoryExt;
1515
use gitbutler_project::access::WorktreeWritePermission;
16+
use gitbutler_project::AUTO_TRACK_LIMIT_BYTES;
1617
use gitbutler_reference::{Refname, RemoteRefname};
1718
use gitbutler_repo::logging::{LogUntil, RepositoryExt as _};
1819
use gitbutler_repo::{
@@ -305,7 +306,7 @@ impl BranchManager<'_> {
305306

306307
// We don't support having two branches applied that conflict with each other
307308
{
308-
let uncommited_changes_tree_id = repo.create_wd_tree()?.id();
309+
let uncommited_changes_tree_id = repo.create_wd_tree(AUTO_TRACK_LIMIT_BYTES)?.id();
309310
let gix_repo = self.ctx.gix_repository_for_merging_non_persisting()?;
310311
let merges_cleanly = gix_repo
311312
.merges_cleanly_compat(

crates/gitbutler-branch-actions/src/virtual.rs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ use gitbutler_oxidize::{
2626
git2_signature_to_gix_signature, git2_to_gix_object_id, gix_to_git2_oid, GixRepositoryExt,
2727
};
2828
use gitbutler_project::access::WorktreeWritePermission;
29+
use gitbutler_project::AUTO_TRACK_LIMIT_BYTES;
2930
use gitbutler_reference::{normalize_branch_name, Refname, RemoteRefname};
3031
use gitbutler_repo::{
3132
logging::{LogUntil, RepositoryExt as _},
@@ -1089,7 +1090,7 @@ pub fn is_remote_branch_mergeable(
10891090

10901091
let base_tree = find_base_tree(ctx.repo(), &branch_commit, &target_commit)?;
10911092

1092-
let wd_tree = ctx.repo().create_wd_tree()?;
1093+
let wd_tree = ctx.repo().create_wd_tree(AUTO_TRACK_LIMIT_BYTES)?;
10931094

10941095
let branch_tree = branch_commit.tree().context("failed to find branch tree")?;
10951096
let gix_repo_in_memory = ctx.gix_repository_for_merging()?.with_object_memory();

crates/gitbutler-edit-mode/src/lib.rs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ use gitbutler_operating_modes::{
2020
};
2121
use gitbutler_oxidize::{git2_to_gix_object_id, gix_to_git2_index, GixRepositoryExt};
2222
use gitbutler_project::access::{WorktreeReadPermission, WorktreeWritePermission};
23+
use gitbutler_project::AUTO_TRACK_LIMIT_BYTES;
2324
use gitbutler_reference::{ReferenceName, Refname};
2425
use gitbutler_repo::{rebase::cherry_rebase, RepositoryExt};
2526
use gitbutler_repo::{signature, SignaturePurpose};
@@ -234,7 +235,7 @@ pub(crate) fn save_and_return_to_workspace(
234235
let parents = commit.parents().collect::<Vec<_>>();
235236

236237
// Recommit commit
237-
let tree = repository.create_wd_tree()?;
238+
let tree = repository.create_wd_tree(AUTO_TRACK_LIMIT_BYTES)?;
238239

239240
let (_, committer) = repository.signatures()?;
240241
let commit_headers = commit

crates/gitbutler-oplog/src/oplog.rs

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ use gitbutler_oxidize::{
2020
};
2121
use gitbutler_project::{
2222
access::{WorktreeReadPermission, WorktreeWritePermission},
23-
Project,
23+
Project, AUTO_TRACK_LIMIT_BYTES,
2424
};
2525
use gitbutler_repo::RepositoryExt;
2626
use gitbutler_repo::SignaturePurpose;
@@ -30,8 +30,6 @@ use gix::object::tree::diff::Change;
3030
use gix::prelude::ObjectIdExt;
3131
use tracing::instrument;
3232

33-
const SNAPSHOT_FILE_LIMIT_BYTES: u64 = 32 * 1024 * 1024;
34-
3533
/// The Oplog allows for crating snapshots of the current state of the project as well as restoring to a previous snapshot.
3634
/// Snapshots include the state of the working directory as well as all additional GitButler state (e.g. virtual branches, conflict state).
3735
/// The data is stored as git trees in the following shape:
@@ -312,7 +310,7 @@ impl OplogExt for Project {
312310
let old_wd_tree_id = tree_from_applied_vbranches(&gix_repo, commit.parent(0)?.id())?;
313311
let old_wd_tree = repo.find_tree(old_wd_tree_id)?;
314312

315-
repo.ignore_large_files_in_diffs(SNAPSHOT_FILE_LIMIT_BYTES)?;
313+
repo.ignore_large_files_in_diffs(AUTO_TRACK_LIMIT_BYTES)?;
316314

317315
let mut diff_opts = git2::DiffOptions::new();
318316
diff_opts
@@ -602,7 +600,7 @@ fn restore_snapshot(
602600
let workdir_tree_id = tree_from_applied_vbranches(&gix_repo, snapshot_commit_id)?;
603601
let workdir_tree = repo.find_tree(workdir_tree_id)?;
604602

605-
repo.ignore_large_files_in_diffs(SNAPSHOT_FILE_LIMIT_BYTES)?;
603+
repo.ignore_large_files_in_diffs(AUTO_TRACK_LIMIT_BYTES)?;
606604

607605
// Define the checkout builder
608606
let mut checkout_builder = git2::build::CheckoutBuilder::new();
@@ -739,7 +737,7 @@ fn lines_since_snapshot(project: &Project, repo: &git2::Repository) -> Result<us
739737
// This looks at the diff between the tree of the currently selected as 'default' branch (where new changes go)
740738
// and that same tree in the last snapshot. For some reason, comparing workdir to the workdir subree from
741739
// the snapshot simply does not give us what we need here, so instead using tree to tree comparison.
742-
repo.ignore_large_files_in_diffs(SNAPSHOT_FILE_LIMIT_BYTES)?;
740+
repo.ignore_large_files_in_diffs(AUTO_TRACK_LIMIT_BYTES)?;
743741

744742
let oplog_state = OplogHandle::new(&project.gb_dir());
745743
let Some(oplog_commit_id) = oplog_state.oplog_head()? else {

crates/gitbutler-project/src/lib.rs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,3 +18,6 @@ pub fn configure_git2() {
1818
// These settings are only changed from `main` of applications.
1919
git2::opts::strict_object_creation(false);
2020
}
21+
22+
/// The maximum size of files to automatically start tracking, i.e. untracked files we pick up for tree-creation.
23+
pub const AUTO_TRACK_LIMIT_BYTES: u64 = 32 * 1024 * 1024;

crates/gitbutler-repo/src/repository_ext.rs

Lines changed: 95 additions & 83 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
use crate::Config;
22
use crate::SignaturePurpose;
33
use anyhow::{anyhow, bail, Context, Result};
4-
use bstr::BString;
4+
use bstr::{BStr, BString};
55
use git2::Tree;
66
use gitbutler_commit::commit_headers::CommitHeadersV2;
77
use gitbutler_config::git::{GbConfig, GitConfig};
@@ -12,6 +12,7 @@ use gitbutler_oxidize::{
1212
use gitbutler_reference::{Refname, RemoteRefname};
1313
use gix::filter::plumbing::pipeline::convert::ToGitOutcome;
1414
use gix::objs::WriteTo;
15+
use gix::status::index_worktree;
1516
use std::collections::HashSet;
1617
#[cfg(unix)]
1718
use std::os::unix::fs::PermissionsExt;
@@ -42,8 +43,15 @@ pub trait RepositoryExt {
4243
fn sign_buffer(&self, buffer: &[u8]) -> Result<BString>;
4344
fn checkout_tree_builder<'a>(&'a self, tree: &'a git2::Tree<'a>) -> CheckoutTreeBuidler<'a>;
4445
fn maybe_find_branch_by_refname(&self, name: &Refname) -> Result<Option<git2::Branch>>;
45-
/// Based on the index, add all data similar to `git add .` and create a tree from it, which is returned.
46-
fn create_wd_tree(&self) -> Result<Tree>;
46+
/// Add all untracked and modified files in the worktree to
47+
/// the object database, and create a tree from it.
48+
///
49+
/// Use `untracked_limit_in_bytes` to control the maximum file size for untracked files
50+
/// before we stop tracking them automatically. Set it to 0 to disable the limit.
51+
///
52+
/// It should also be noted that this will fail if run on an empty branch
53+
/// or if the HEAD branch has no commits.
54+
fn create_wd_tree(&self, untracked_limit_in_bytes: u64) -> Result<Tree>;
4755

4856
/// Returns the `gitbutler/workspace` branch if the head currently points to it, or fail otherwise.
4957
/// Use it before any modification to the repository, or extra defensively each time the
@@ -105,15 +113,8 @@ impl RepositoryExt for git2::Repository {
105113
Ok(branch)
106114
}
107115

108-
/// Add all untracked and modified files in the worktree to
109-
/// the object database, and create a tree from it.
110-
///
111-
/// Note that right now, it doesn't skip big files.
112-
///
113-
/// It should also be noted that this will fail if run on an empty branch
114-
/// or if the HEAD branch has no commits.
115-
#[instrument(level = tracing::Level::DEBUG, skip(self), err(Debug))]
116-
fn create_wd_tree(&self) -> Result<Tree> {
116+
#[instrument(level = tracing::Level::DEBUG, skip(self, untracked_limit_in_bytes), err(Debug))]
117+
fn create_wd_tree(&self, untracked_limit_in_bytes: u64) -> Result<Tree> {
117118
use bstr::ByteSlice;
118119
use gix::dir::walk::EmissionMode;
119120
use gix::status;
@@ -133,6 +134,57 @@ impl RepositoryExt for git2::Repository {
133134
)?;
134135
let (mut pipeline, index) = repo.filter_pipeline(None)?;
135136
let workdir = repo.work_dir().context("Need non-bare repository")?;
137+
let mut added_worktree_file = |rela_path: &BStr,
138+
head_tree_editor: &mut gix::object::tree::Editor<'_>|
139+
-> anyhow::Result<bool> {
140+
let rela_path_as_path = gix::path::from_bstr(rela_path);
141+
let path = workdir.join(&rela_path_as_path);
142+
let Ok(md) = std::fs::symlink_metadata(&path) else {
143+
return Ok(false);
144+
};
145+
if md.len() > untracked_limit_in_bytes {
146+
return Ok(false);
147+
}
148+
let (id, kind) = if md.is_symlink() {
149+
let target = std::fs::read_link(&path).with_context(|| {
150+
format!(
151+
"Failed to read link at '{}' for adding to the object database",
152+
path.display()
153+
)
154+
})?;
155+
let id = repo.write_blob(gix::path::into_bstr(target).as_bytes())?;
156+
(id, gix::object::tree::EntryKind::Link)
157+
} else if md.is_file() {
158+
let file = std::fs::File::open(&path).with_context(|| {
159+
format!(
160+
"Could not open file at '{}' for adding it to the object database",
161+
path.display()
162+
)
163+
})?;
164+
let file_for_git =
165+
pipeline.convert_to_git(file, rela_path_as_path.as_ref(), &index)?;
166+
let id = match file_for_git {
167+
ToGitOutcome::Unchanged(mut file) => repo.write_blob_stream(&mut file)?,
168+
ToGitOutcome::Buffer(buf) => repo.write_blob(buf)?,
169+
ToGitOutcome::Process(mut read) => repo.write_blob_stream(&mut read)?,
170+
};
171+
172+
let kind = if gix::fs::is_executable(&md) {
173+
gix::object::tree::EntryKind::BlobExecutable
174+
} else {
175+
gix::object::tree::EntryKind::Blob
176+
};
177+
(id, kind)
178+
} else {
179+
// This is probably a type-change to something we can't track. Instead of keeping
180+
// what's in `HEAD^{tree}` we remove the entry.
181+
head_tree_editor.remove(rela_path)?;
182+
return Ok(true);
183+
};
184+
185+
head_tree_editor.upsert(rela_path, kind, id)?;
186+
Ok(true)
187+
};
136188
let mut head_tree_editor = repo.edit_tree(repo.head_tree_id()?)?;
137189
let status_changes = repo
138190
.status(gix::progress::Discard)?
@@ -154,6 +206,8 @@ impl RepositoryExt for git2::Repository {
154206
.into_iter(None)?;
155207

156208
let mut worktreepaths_changed = HashSet::new();
209+
// We have to apply untracked items last, but don't have ordering here so impose it ourselves.
210+
let mut untracked_items = Vec::new();
157211
for change in status_changes {
158212
let change = change?;
159213
match change {
@@ -193,7 +247,7 @@ impl RepositoryExt for git2::Repository {
193247
)?;
194248
}
195249
}
196-
status::Item::IndexWorktree(gix::status::index_worktree::Item::Modification {
250+
status::Item::IndexWorktree(index_worktree::Item::Modification {
197251
rela_path,
198252
status: EntryStatus::Change(Change::Removed),
199253
..
@@ -203,73 +257,29 @@ impl RepositoryExt for git2::Repository {
203257
}
204258
// modified or untracked files are unconditionally added as blob.
205259
// Note that this implementation will re-read the whole blob even on type-change
206-
status::Item::IndexWorktree(
207-
gix::status::index_worktree::Item::Modification {
208-
rela_path,
209-
status:
210-
EntryStatus::Change(Change::Type | Change::Modification { .. })
211-
| EntryStatus::IntentToAdd,
212-
..
213-
}
214-
| gix::status::index_worktree::Item::DirectoryContents {
215-
entry:
216-
gix::dir::Entry {
217-
rela_path,
218-
status: gix::dir::entry::Status::Untracked,
219-
..
220-
},
221-
..
222-
},
223-
) => {
224-
let rela_path_as_path = gix::path::from_bstr(&rela_path);
225-
let path = workdir.join(&rela_path_as_path);
226-
let Ok(md) = std::fs::symlink_metadata(&path) else {
227-
continue;
228-
};
229-
let (id, kind) = if md.is_symlink() {
230-
let target = std::fs::read_link(&path).with_context(|| {
231-
format!(
232-
"Failed to read link at '{}' for adding to the object database",
233-
path.display()
234-
)
235-
})?;
236-
let id = repo.write_blob(gix::path::into_bstr(target).as_bytes())?;
237-
(id, gix::object::tree::EntryKind::Link)
238-
} else if md.is_file() {
239-
let file = std::fs::File::open(&path).with_context(|| {
240-
format!(
241-
"Could not open file at '{}' for adding it to the object database",
242-
path.display()
243-
)
244-
})?;
245-
let file_for_git =
246-
pipeline.convert_to_git(file, rela_path_as_path.as_ref(), &index)?;
247-
let id = match file_for_git {
248-
ToGitOutcome::Unchanged(mut file) => {
249-
repo.write_blob_stream(&mut file)?
250-
}
251-
ToGitOutcome::Buffer(buf) => repo.write_blob(buf)?,
252-
ToGitOutcome::Process(mut read) => repo.write_blob_stream(&mut read)?,
253-
};
254-
255-
let kind = if gix::fs::is_executable(&md) {
256-
gix::object::tree::EntryKind::BlobExecutable
257-
} else {
258-
gix::object::tree::EntryKind::Blob
259-
};
260-
(id, kind)
261-
} else {
262-
// This is probably a type-change to something we can't track. Instead of keeping
263-
// what's in `HEAD^{tree}` we remove the entry.
264-
head_tree_editor.remove(rela_path.as_bstr())?;
260+
status::Item::IndexWorktree(index_worktree::Item::Modification {
261+
rela_path,
262+
status:
263+
EntryStatus::Change(Change::Type | Change::Modification { .. })
264+
| EntryStatus::IntentToAdd,
265+
..
266+
}) => {
267+
if added_worktree_file(rela_path.as_ref(), &mut head_tree_editor)? {
265268
worktreepaths_changed.insert(rela_path);
266-
continue;
267-
};
268-
269-
head_tree_editor.upsert(rela_path.as_bstr(), kind, id)?;
270-
worktreepaths_changed.insert(rela_path);
269+
}
271270
}
272-
status::Item::IndexWorktree(gix::status::index_worktree::Item::Modification {
271+
status::Item::IndexWorktree(index_worktree::Item::DirectoryContents {
272+
entry:
273+
gix::dir::Entry {
274+
rela_path,
275+
status: gix::dir::entry::Status::Untracked,
276+
..
277+
},
278+
..
279+
}) => {
280+
untracked_items.push(rela_path);
281+
}
282+
status::Item::IndexWorktree(index_worktree::Item::Modification {
273283
rela_path,
274284
status: EntryStatus::Change(Change::SubmoduleModification(change)),
275285
..
@@ -283,18 +293,16 @@ impl RepositoryExt for git2::Repository {
283293
worktreepaths_changed.insert(rela_path);
284294
}
285295
}
286-
status::Item::IndexWorktree(gix::status::index_worktree::Item::Rewrite {
287-
..
288-
})
296+
status::Item::IndexWorktree(index_worktree::Item::Rewrite { .. })
289297
| status::Item::TreeIndex(gix::diff::index::Change::Rewrite { .. }) => {
290298
unreachable!("disabled")
291299
}
292300
status::Item::IndexWorktree(
293-
gix::status::index_worktree::Item::Modification {
301+
index_worktree::Item::Modification {
294302
status: EntryStatus::Conflict(_) | EntryStatus::NeedsUpdate(_),
295303
..
296304
}
297-
| gix::status::index_worktree::Item::DirectoryContents {
305+
| index_worktree::Item::DirectoryContents {
298306
entry:
299307
gix::dir::Entry {
300308
status:
@@ -309,6 +317,10 @@ impl RepositoryExt for git2::Repository {
309317
}
310318
}
311319

320+
for rela_path in untracked_items {
321+
added_worktree_file(rela_path.as_ref(), &mut head_tree_editor)?;
322+
}
323+
312324
let tree_oid = gix_to_git2_oid(head_tree_editor.write()?);
313325
Ok(self.find_tree(tree_oid)?)
314326
}

0 commit comments

Comments
 (0)