@@ -83,13 +83,18 @@ impl RefInfo {
83
83
!prune
84
84
} ) ;
85
85
86
+ let cost_info = (
87
+ upstream_commits. len ( ) ,
88
+ repo. index_or_empty ( ) ?. entries ( ) . len ( ) ,
89
+ ) ;
86
90
let upstream_lut = create_similarity_lut (
87
91
repo,
88
92
upstream_commits. iter ( ) . filter_map ( |id| {
89
93
but_core:: Commit :: from_id ( id. attach ( repo) )
90
94
. map ( ui:: Commit :: from)
91
95
. ok ( )
92
96
} ) ,
97
+ cost_info,
93
98
expensive,
94
99
) ?;
95
100
@@ -98,8 +103,12 @@ impl RefInfo {
98
103
' next_stack: for stack in & mut self . stacks {
99
104
for segment in & mut stack. segments {
100
105
// At first, these are all commits that aren't also available by identity as local commits.
101
- let remote_lut =
102
- create_similarity_lut ( repo, segment. commits_on_remote . iter ( ) , expensive) ?;
106
+ let remote_lut = create_similarity_lut (
107
+ repo,
108
+ segment. commits_on_remote . iter ( ) ,
109
+ cost_info,
110
+ expensive,
111
+ ) ?;
103
112
104
113
for local in segment
105
114
// top-to-bottom
@@ -281,31 +290,56 @@ fn lookup_similar<'a>(
281
290
fn create_similarity_lut (
282
291
repo : & Repository ,
283
292
commits : impl Iterator < Item = impl Borrow < ui:: Commit > > ,
293
+ ( max_commits, num_tracked_files) : ( usize , usize ) ,
284
294
expensive : bool ,
285
295
) -> anyhow:: Result < Identity > {
296
+ // experimental modern CPU perf, based on 100 diffs/s at 90k entries
297
+ // Make this smaller to get more threads even with lower amounts of work.
298
+ const CPU_PERF : usize = 10_000_000 / 5 /* start parallelizing earlier */ ;
299
+ let aproximate_cpu_seconds = ( max_commits * num_tracked_files) / CPU_PERF ;
300
+ let num_threads = aproximate_cpu_seconds
301
+ . max ( 1 )
302
+ . min ( std:: thread:: available_parallelism ( ) ?. get ( ) ) ;
303
+
286
304
let mut similarity_lut = HashMap :: < Identifier , gix:: ObjectId > :: new ( ) ;
287
- {
288
- let mut ambiguous_commits = HashSet :: < Identifier > :: new ( ) ;
289
- let mut insert_or_expell_ambiguous = |k : Identifier , v : gix:: ObjectId | {
290
- if ambiguous_commits. contains ( & k) {
291
- return ;
292
- }
293
- match similarity_lut. entry ( k) {
294
- Entry :: Occupied ( ambiguous) => {
295
- if matches ! ( ambiguous. key( ) , Identifier :: ChangesetId ( _) ) {
296
- // the most expensive option should never be ambiguous (which can happen with merges),
297
- // so just keep the (typically top-most/first) commit with a changeset ID instead.
298
- return ;
299
- }
300
- ambiguous_commits. insert ( ambiguous. key ( ) . clone ( ) ) ;
301
- ambiguous. remove ( ) ;
302
- }
303
- Entry :: Vacant ( entry) => {
304
- entry. insert ( v) ;
305
+ let mut ambiguous_commits = HashSet :: < Identifier > :: new ( ) ;
306
+
307
+ let mut insert_or_expell_ambiguous = |k : Identifier , v : gix:: ObjectId | {
308
+ if ambiguous_commits. contains ( & k) {
309
+ return ;
310
+ }
311
+ match similarity_lut. entry ( k) {
312
+ Entry :: Occupied ( ambiguous) => {
313
+ if matches ! ( ambiguous. key( ) , Identifier :: ChangesetId ( _) ) {
314
+ // the most expensive option should never be ambiguous (which can happen with merges),
315
+ // so just keep the (typically top-most/first) commit with a changeset ID instead.
316
+ return ;
305
317
}
318
+ ambiguous_commits. insert ( ambiguous. key ( ) . clone ( ) ) ;
319
+ ambiguous. remove ( ) ;
306
320
}
307
- } ;
308
- for commit in commits {
321
+ Entry :: Vacant ( entry) => {
322
+ entry. insert ( v) ;
323
+ }
324
+ }
325
+ } ;
326
+
327
+ let should_stop = |start : std:: time:: Instant , commit_idx : usize | {
328
+ const MAX_DURATION : std:: time:: Duration = std:: time:: Duration :: from_secs ( 1 ) ;
329
+ let out_of_time = start. elapsed ( ) > MAX_DURATION ;
330
+ if out_of_time {
331
+ tracing:: warn!(
332
+ "Stopping expensive changeset computation after {}s and {commit_idx} diffs computed ({throughput:02} diffs/s)" ,
333
+ MAX_DURATION . as_secs( ) ,
334
+ throughput = commit_idx as f32 / start. elapsed( ) . as_secs_f32( ) ,
335
+ ) ;
336
+ }
337
+ out_of_time
338
+ } ;
339
+
340
+ if num_threads <= 1 || !expensive {
341
+ let mut expensive = expensive. then ( std:: time:: Instant :: now) ;
342
+ for ( idx, commit) in commits. enumerate ( ) {
309
343
let commit = commit. borrow ( ) ;
310
344
if let Some ( change_id) = & commit. change_id {
311
345
insert_or_expell_ambiguous ( Identifier :: ChangeId ( change_id. clone ( ) ) , commit. id ) ;
@@ -317,16 +351,94 @@ fn create_similarity_lut(
317
351
} ,
318
352
commit. id ,
319
353
) ;
320
- if expensive {
354
+ if let Some ( start ) = expensive {
321
355
let Some ( changeset_id) =
322
356
id_for_tree_diff ( repo, commit. parent_ids . first ( ) . cloned ( ) , commit. id ) ?
323
357
else {
324
358
continue ;
325
359
} ;
326
360
insert_or_expell_ambiguous ( Identifier :: ChangesetId ( changeset_id) , commit. id ) ;
361
+
362
+ if should_stop ( start, idx) {
363
+ expensive = None ;
364
+ }
365
+ }
366
+ }
367
+ } else {
368
+ let ( in_tx, out_rx) = {
369
+ let ( in_tx, in_rx) = flume:: unbounded ( ) ;
370
+ let ( out_tx, out_rx) = flume:: unbounded ( ) ;
371
+ for tid in 0 ..num_threads {
372
+ std:: thread:: Builder :: new ( )
373
+ . name ( format ! ( "GitButler::compute-changeset({tid})" ) )
374
+ . spawn ( {
375
+ let in_rx = in_rx. clone ( ) ;
376
+ let out_tx = out_tx. clone ( ) ;
377
+ let repo = repo. clone ( ) . into_sync ( ) ;
378
+ move || -> anyhow:: Result < ( ) > {
379
+ let mut repo = repo. to_thread_local ( ) ;
380
+ repo. object_cache_size_if_unset (
381
+ repo. compute_object_cache_size_for_tree_diffs (
382
+ & * repo. index_or_empty ( ) ?,
383
+ ) ,
384
+ ) ;
385
+ for ( idx, lhs, rhs) in in_rx {
386
+ if out_tx
387
+ . send (
388
+ id_for_tree_diff ( & repo, lhs, rhs)
389
+ . map ( |opt| opt. map ( |cs_id| ( idx, cs_id, rhs) ) ) ,
390
+ )
391
+ . is_err ( )
392
+ {
393
+ break ;
394
+ }
395
+ }
396
+ Ok ( ( ) )
397
+ }
398
+ } ) ?;
399
+ }
400
+ ( in_tx, out_rx)
401
+ } ;
402
+
403
+ assert ! (
404
+ expensive,
405
+ "BUG: multi-threading is only for expensive checks"
406
+ ) ;
407
+ for ( idx, commit) in commits. enumerate ( ) {
408
+ let commit = commit. borrow ( ) ;
409
+ if let Some ( change_id) = & commit. change_id {
410
+ insert_or_expell_ambiguous ( Identifier :: ChangeId ( change_id. clone ( ) ) , commit. id ) ;
411
+ }
412
+ insert_or_expell_ambiguous (
413
+ Identifier :: CommitData {
414
+ author : commit. author . clone ( ) . into ( ) ,
415
+ message : commit. message . clone ( ) ,
416
+ } ,
417
+ commit. id ,
418
+ ) ;
419
+
420
+ in_tx
421
+ . send ( ( idx, commit. parent_ids . first ( ) . cloned ( ) , commit. id ) )
422
+ . ok ( ) ;
423
+ }
424
+ drop ( in_tx) ;
425
+
426
+ let start = std:: time:: Instant :: now ( ) ;
427
+ let mut max_idx = 0 ;
428
+ for res in out_rx {
429
+ let Some ( ( idx, changeset_id, commit_id) ) = res? else {
430
+ continue ;
431
+ } ;
432
+
433
+ insert_or_expell_ambiguous ( Identifier :: ChangesetId ( changeset_id) , commit_id) ;
434
+
435
+ max_idx = max_idx. max ( idx) ;
436
+ if should_stop ( start, max_idx) {
437
+ break ;
327
438
}
328
439
}
329
440
}
441
+
330
442
Ok ( similarity_lut)
331
443
}
332
444
0 commit comments