Skip to content

Commit 16f9845

Browse files
committed
more changeset-related performance improvements through parallelization
1 parent 9b09094 commit 16f9845

File tree

3 files changed

+168
-23
lines changed

3 files changed

+168
-23
lines changed

Cargo.lock

Lines changed: 31 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

crates/but-workspace/Cargo.toml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,8 @@ itertools = "0.14"
3030
url = { version = "2.5.4", features = ["serde"] }
3131
md5 = "0.8.0"
3232
tracing.workspace = true
33+
# For SPMC channel
34+
flume = "0.11.1"
3335

3436
[dev-dependencies]
3537
but-testsupport.workspace = true

crates/but-workspace/src/changeset.rs

Lines changed: 135 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -83,13 +83,18 @@ impl RefInfo {
8383
!prune
8484
});
8585

86+
let cost_info = (
87+
upstream_commits.len(),
88+
repo.index_or_empty()?.entries().len(),
89+
);
8690
let upstream_lut = create_similarity_lut(
8791
repo,
8892
upstream_commits.iter().filter_map(|id| {
8993
but_core::Commit::from_id(id.attach(repo))
9094
.map(ui::Commit::from)
9195
.ok()
9296
}),
97+
cost_info,
9398
expensive,
9499
)?;
95100

@@ -98,8 +103,12 @@ impl RefInfo {
98103
'next_stack: for stack in &mut self.stacks {
99104
for segment in &mut stack.segments {
100105
// At first, these are all commits that aren't also available by identity as local commits.
101-
let remote_lut =
102-
create_similarity_lut(repo, segment.commits_on_remote.iter(), expensive)?;
106+
let remote_lut = create_similarity_lut(
107+
repo,
108+
segment.commits_on_remote.iter(),
109+
cost_info,
110+
expensive,
111+
)?;
103112

104113
for local in segment
105114
// top-to-bottom
@@ -281,31 +290,56 @@ fn lookup_similar<'a>(
281290
fn create_similarity_lut(
282291
repo: &Repository,
283292
commits: impl Iterator<Item = impl Borrow<ui::Commit>>,
293+
(max_commits, num_tracked_files): (usize, usize),
284294
expensive: bool,
285295
) -> anyhow::Result<Identity> {
296+
// experimental modern CPU perf, based on 100 diffs/s at 90k entries
297+
// Make this smaller to get more threads even with lower amounts of work.
298+
const CPU_PERF: usize = 10_000_000 / 5 /* start parallelizing earlier */;
299+
let aproximate_cpu_seconds = (max_commits * num_tracked_files) / CPU_PERF;
300+
let num_threads = aproximate_cpu_seconds
301+
.max(1)
302+
.min(std::thread::available_parallelism()?.get());
303+
286304
let mut similarity_lut = HashMap::<Identifier, gix::ObjectId>::new();
287-
{
288-
let mut ambiguous_commits = HashSet::<Identifier>::new();
289-
let mut insert_or_expell_ambiguous = |k: Identifier, v: gix::ObjectId| {
290-
if ambiguous_commits.contains(&k) {
291-
return;
292-
}
293-
match similarity_lut.entry(k) {
294-
Entry::Occupied(ambiguous) => {
295-
if matches!(ambiguous.key(), Identifier::ChangesetId(_)) {
296-
// the most expensive option should never be ambiguous (which can happen with merges),
297-
// so just keep the (typically top-most/first) commit with a changeset ID instead.
298-
return;
299-
}
300-
ambiguous_commits.insert(ambiguous.key().clone());
301-
ambiguous.remove();
302-
}
303-
Entry::Vacant(entry) => {
304-
entry.insert(v);
305+
let mut ambiguous_commits = HashSet::<Identifier>::new();
306+
307+
let mut insert_or_expell_ambiguous = |k: Identifier, v: gix::ObjectId| {
308+
if ambiguous_commits.contains(&k) {
309+
return;
310+
}
311+
match similarity_lut.entry(k) {
312+
Entry::Occupied(ambiguous) => {
313+
if matches!(ambiguous.key(), Identifier::ChangesetId(_)) {
314+
// the most expensive option should never be ambiguous (which can happen with merges),
315+
// so just keep the (typically top-most/first) commit with a changeset ID instead.
316+
return;
305317
}
318+
ambiguous_commits.insert(ambiguous.key().clone());
319+
ambiguous.remove();
306320
}
307-
};
308-
for commit in commits {
321+
Entry::Vacant(entry) => {
322+
entry.insert(v);
323+
}
324+
}
325+
};
326+
327+
let should_stop = |start: std::time::Instant, commit_idx: usize| {
328+
const MAX_DURATION: std::time::Duration = std::time::Duration::from_secs(1);
329+
let out_of_time = start.elapsed() > MAX_DURATION;
330+
if out_of_time {
331+
tracing::warn!(
332+
"Stopping expensive changeset computation after {}s and {commit_idx} diffs computed ({throughput:02} diffs/s)",
333+
MAX_DURATION.as_secs(),
334+
throughput = commit_idx as f32 / start.elapsed().as_secs_f32(),
335+
);
336+
}
337+
out_of_time
338+
};
339+
340+
if num_threads <= 1 || !expensive {
341+
let mut expensive = expensive.then(std::time::Instant::now);
342+
for (idx, commit) in commits.enumerate() {
309343
let commit = commit.borrow();
310344
if let Some(change_id) = &commit.change_id {
311345
insert_or_expell_ambiguous(Identifier::ChangeId(change_id.clone()), commit.id);
@@ -317,16 +351,94 @@ fn create_similarity_lut(
317351
},
318352
commit.id,
319353
);
320-
if expensive {
354+
if let Some(start) = expensive {
321355
let Some(changeset_id) =
322356
id_for_tree_diff(repo, commit.parent_ids.first().cloned(), commit.id)?
323357
else {
324358
continue;
325359
};
326360
insert_or_expell_ambiguous(Identifier::ChangesetId(changeset_id), commit.id);
361+
362+
if should_stop(start, idx) {
363+
expensive = None;
364+
}
365+
}
366+
}
367+
} else {
368+
let (in_tx, out_rx) = {
369+
let (in_tx, in_rx) = flume::unbounded();
370+
let (out_tx, out_rx) = flume::unbounded();
371+
for tid in 0..num_threads {
372+
std::thread::Builder::new()
373+
.name(format!("GitButler::compute-changeset({tid})"))
374+
.spawn({
375+
let in_rx = in_rx.clone();
376+
let out_tx = out_tx.clone();
377+
let repo = repo.clone().into_sync();
378+
move || -> anyhow::Result<()> {
379+
let mut repo = repo.to_thread_local();
380+
repo.object_cache_size_if_unset(
381+
repo.compute_object_cache_size_for_tree_diffs(
382+
&*repo.index_or_empty()?,
383+
),
384+
);
385+
for (idx, lhs, rhs) in in_rx {
386+
if out_tx
387+
.send(
388+
id_for_tree_diff(&repo, lhs, rhs)
389+
.map(|opt| opt.map(|cs_id| (idx, cs_id, rhs))),
390+
)
391+
.is_err()
392+
{
393+
break;
394+
}
395+
}
396+
Ok(())
397+
}
398+
})?;
399+
}
400+
(in_tx, out_rx)
401+
};
402+
403+
assert!(
404+
expensive,
405+
"BUG: multi-threading is only for expensive checks"
406+
);
407+
for (idx, commit) in commits.enumerate() {
408+
let commit = commit.borrow();
409+
if let Some(change_id) = &commit.change_id {
410+
insert_or_expell_ambiguous(Identifier::ChangeId(change_id.clone()), commit.id);
411+
}
412+
insert_or_expell_ambiguous(
413+
Identifier::CommitData {
414+
author: commit.author.clone().into(),
415+
message: commit.message.clone(),
416+
},
417+
commit.id,
418+
);
419+
420+
in_tx
421+
.send((idx, commit.parent_ids.first().cloned(), commit.id))
422+
.ok();
423+
}
424+
drop(in_tx);
425+
426+
let start = std::time::Instant::now();
427+
let mut max_idx = 0;
428+
for res in out_rx {
429+
let Some((idx, changeset_id, commit_id)) = res? else {
430+
continue;
431+
};
432+
433+
insert_or_expell_ambiguous(Identifier::ChangesetId(changeset_id), commit_id);
434+
435+
max_idx = max_idx.max(idx);
436+
if should_stop(start, max_idx) {
437+
break;
327438
}
328439
}
329440
}
441+
330442
Ok(similarity_lut)
331443
}
332444

0 commit comments

Comments
 (0)