Skip to content

Commit 10f36bc

Browse files
committed
Make diff-generation faster
- don't look at too many upstream commits - use plumbing for diff-tree for less overhead Still TBD: - multi-threading
1 parent cbfb19d commit 10f36bc

File tree

3 files changed

+81
-41
lines changed

3 files changed

+81
-41
lines changed

crates/but-graph/src/projection/workspace.rs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -473,12 +473,12 @@ impl Graph {
473473
(out, stopped_at)
474474
}
475475

476-
/// Visit the ancestry of `start` along the first parents, itself included, until `stop` returns `true`.
476+
/// Visit the ancestry of `start` along the first parents, itself excluded, until `stop` returns `true`.
477477
/// Also return the segment that we stopped at.
478478
/// **Important**: `stop` is not called with `start`, this is a feature.
479479
///
480480
/// Note that the traversal assumes as well-segmented graph without cycles.
481-
fn visit_segments_along_first_parent_until(
481+
fn visit_segments_downward_along_first_parent_until(
482482
&self,
483483
start: SegmentIndex,
484484
mut stop: impl FnMut(&Segment) -> bool,
@@ -507,7 +507,7 @@ impl Graph {
507507
mut find: impl FnMut(&Segment) -> Option<T>,
508508
) -> Option<T> {
509509
let mut out = None;
510-
self.visit_segments_along_first_parent_until(start, |s| {
510+
self.visit_segments_downward_along_first_parent_until(start, |s| {
511511
if let Some(res) = find(s) {
512512
out = Some(res);
513513
true

crates/but-testing/src/command/mod.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -466,7 +466,7 @@ pub fn operating_mode(args: &super::Args) -> anyhow::Result<()> {
466466
}
467467

468468
pub fn ref_info(args: &super::Args, ref_name: Option<&str>, expensive: bool) -> anyhow::Result<()> {
469-
let (repo, project) = repo_and_maybe_project(args, RepositoryOpenMode::General)?;
469+
let (repo, project) = repo_and_maybe_project(args, RepositoryOpenMode::Merge)?;
470470
let opts = but_workspace::ref_info::Options {
471471
stack_commit_limit: 0,
472472
expensive_commit_info: expensive,

crates/but-workspace/src/changeset.rs

Lines changed: 77 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -4,11 +4,6 @@
44
//! This property allows changeset IDs to be used to determine if two different commits, or sets of commits,
55
//! represent the same change.
66
7-
use std::{
8-
borrow::Borrow,
9-
collections::{HashMap, HashSet, hash_map::Entry},
10-
};
11-
127
use crate::{
138
RefInfo,
149
ref_info::{
@@ -18,9 +13,15 @@ use crate::{
1813
ui::PushStatus,
1914
};
2015
use bstr::BString;
21-
use but_core::{ChangeState, TreeChange, TreeStatus, commit::TreeKind};
16+
use but_core::{ChangeState, commit::TreeKind};
17+
use gix::diff::tree::recorder::Change;
2218
use gix::{ObjectId, Repository, object::tree::EntryKind, prelude::ObjectIdExt};
2319
use itertools::Itertools;
20+
use std::ops::Deref;
21+
use std::{
22+
borrow::Borrow,
23+
collections::{HashMap, HashSet, hash_map::Entry},
24+
};
2425

2526
/// The ID of a changeset, calculated as Git hash for convenience.
2627
type ChangesetID = gix::ObjectId;
@@ -68,9 +69,12 @@ impl RefInfo {
6869
self.compute_pushstatus();
6970
return Ok(());
7071
};
72+
let lower_bound_generation = self.lower_bound.map(|sidx| graph[sidx].generation);
7173
graph.visit_all_segments_until(target_tip, but_graph::petgraph::Direction::Outgoing, |s| {
7274
let prune = true;
73-
if Some(s.id) == self.lower_bound {
75+
if Some(s.id) == self.lower_bound
76+
|| lower_bound_generation.is_some_and(|generation| s.generation > generation)
77+
{
7478
return prune;
7579
}
7680
for c in &s.commits {
@@ -348,15 +352,19 @@ fn id_for_tree_diff(
348352
// TODO(perf): use plumbing directly to avoid resource-cache overhead.
349353
// consider parallelization
350354
// really needs caching to be practical, in-memory might suffice for now.
351-
let changes = repo.diff_tree_to_tree(
352-
lhs_tree.as_ref(),
353-
&rhs_tree,
354-
*gix::diff::Options::default()
355-
.track_path()
356-
// Rewrite tracking isn't needed for unique IDs and doesn't alter the validity,
357-
// but would cost time, making it useless.
358-
.track_rewrites(None),
355+
356+
let empty_tree = repo.empty_tree();
357+
let mut state = Default::default();
358+
let mut recorder = gix::diff::tree::Recorder::default()
359+
.track_location(Some(gix::diff::tree::recorder::Location::Path));
360+
gix::diff::tree(
361+
gix::objs::TreeRefIter::from_bytes(&lhs_tree.unwrap_or(empty_tree).data),
362+
gix::objs::TreeRefIter::from_bytes(&rhs_tree.data),
363+
&mut state,
364+
repo.objects.deref(),
365+
&mut recorder,
359366
)?;
367+
let changes = recorder.records;
360368
if changes.is_empty() {
361369
return Ok(None);
362370
}
@@ -366,41 +374,73 @@ fn id_for_tree_diff(
366374

367375
// We rely on the diff order, it's consistent as rewrites are disabled.
368376
for c in changes {
369-
if c.entry_mode().is_tree() {
377+
let (entry_mode, location) = match &c {
378+
Change::Addition {
379+
entry_mode, path, ..
380+
}
381+
| Change::Deletion {
382+
entry_mode, path, ..
383+
}
384+
| Change::Modification {
385+
entry_mode, path, ..
386+
} => (*entry_mode, path),
387+
};
388+
if entry_mode.is_tree() {
370389
continue;
371390
}
372-
// For simplicity, use this type.
373-
let c = TreeChange::from(c);
374391
// must hash all fields, even if None for unambiguous hashes.
375-
hash.update(&c.path);
376-
match c.status {
377-
TreeStatus::Addition {
378-
state,
379-
// Ignore as untracked files can't happen with tree/tree diffs
380-
is_untracked: _,
392+
hash.update(location);
393+
match c {
394+
Change::Addition {
395+
entry_mode, oid, ..
381396
} => {
382397
hash.update(b"A");
383-
hash_change_state(&mut hash, state)
398+
hash_change_state(
399+
&mut hash,
400+
ChangeState {
401+
id: oid,
402+
kind: entry_mode.kind(),
403+
},
404+
)
384405
}
385-
TreeStatus::Deletion { previous_state } => {
406+
Change::Deletion {
407+
entry_mode, oid, ..
408+
} => {
386409
hash.update(b"D");
387-
hash_change_state(&mut hash, previous_state)
410+
hash_change_state(
411+
&mut hash,
412+
ChangeState {
413+
id: oid,
414+
kind: entry_mode.kind(),
415+
},
416+
);
388417
}
389-
TreeStatus::Modification {
390-
previous_state,
391-
state,
392-
// Ignore as it's derived
393-
flags: _,
418+
Change::Modification {
419+
previous_entry_mode,
420+
previous_oid,
421+
entry_mode,
422+
oid,
423+
..
394424
} => {
395425
hash.update(b"M");
396-
hash_change_state(&mut hash, previous_state);
397-
hash_change_state(&mut hash, state);
398-
}
399-
TreeStatus::Rename { .. } => {
400-
unreachable!("disabled in prior configuration")
426+
hash_change_state(
427+
&mut hash,
428+
ChangeState {
429+
id: previous_oid,
430+
kind: previous_entry_mode.kind(),
431+
},
432+
);
433+
hash_change_state(
434+
&mut hash,
435+
ChangeState {
436+
id: oid,
437+
kind: entry_mode.kind(),
438+
},
439+
);
401440
}
402441
}
403442
}
443+
404444
Ok(Some(hash.try_finalize()?))
405445
}
406446

0 commit comments

Comments
 (0)