Skip to content

Commit 7252d9a

Browse files
ttaylorrgitster
authored andcommitted
pseudo-merge: implement support for finding existing merges
This patch implements support for reusing existing pseudo-merge commits when writing bitmaps when there is an existing pseudo-merge bitmap which has exactly the same set of parents as one that we are about to write. Note that unstable pseudo-merges are likely to change between consecutive repacks, and so are generally poor candidates for reuse. However, stable pseudo-merges (see the configuration option 'bitmapPseudoMerge.<name>.stableThreshold') are by definition unlikely to change between runs (as they represent long-running branches). Because there is no index from a *set* of pseudo-merge parents to a matching pseudo-merge bitmap, we have to construct the bitmap corresponding to the set of parents for each pending pseudo-merge commit and see if a matching bitmap exists. This is technically quadratic in the number of pseudo-merges, but is OK in practice for a couple of reasons: - non-matching pseudo-merge bitmaps are rejected quickly as soon as they differ in a single bit - already-matched pseudo-merge bitmaps are discarded from subsequent rounds of search - the number of pseudo-merges is generally small, even for large repositories In order to do this, implement (a) a function that finds a matching pseudo-merge given some uncompressed bitset describing its parents, (b) a function that computes the bitset of parents for a given pseudo-merge commit, and (c) call that function before computing the set of reachable objects for some pending pseudo-merge. Signed-off-by: Taylor Blau <[email protected]> Signed-off-by: Junio C Hamano <[email protected]>
1 parent 94c1add commit 7252d9a

File tree

6 files changed

+174
-2
lines changed

6 files changed

+174
-2
lines changed

pack-bitmap-write.c

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,10 @@
1919
#include "tree-walk.h"
2020
#include "pseudo-merge.h"
2121
#include "oid-array.h"
22+
#include "config.h"
23+
#include "alloc.h"
24+
#include "refs.h"
25+
#include "strmap.h"
2226

2327
struct bitmapped_commit {
2428
struct commit *commit;
@@ -465,6 +469,7 @@ static int fill_bitmap_tree(struct bitmap_writer *writer,
465469
}
466470

467471
static int reused_bitmaps_nr;
472+
static int reused_pseudo_merge_bitmaps_nr;
468473

469474
static int fill_bitmap_commit(struct bitmap_writer *writer,
470475
struct bb_commit *ent,
@@ -490,7 +495,7 @@ static int fill_bitmap_commit(struct bitmap_writer *writer,
490495
struct bitmap *remapped = bitmap_new();
491496

492497
if (commit->object.flags & BITMAP_PSEUDO_MERGE)
493-
old = NULL;
498+
old = pseudo_merge_bitmap_for_commit(old_bitmap, c);
494499
else
495500
old = bitmap_for_commit(old_bitmap, c);
496501
/*
@@ -501,7 +506,10 @@ static int fill_bitmap_commit(struct bitmap_writer *writer,
501506
if (old && !rebuild_bitmap(mapping, old, remapped)) {
502507
bitmap_or(ent->bitmap, remapped);
503508
bitmap_free(remapped);
504-
reused_bitmaps_nr++;
509+
if (commit->object.flags & BITMAP_PSEUDO_MERGE)
510+
reused_pseudo_merge_bitmaps_nr++;
511+
else
512+
reused_bitmaps_nr++;
505513
continue;
506514
}
507515
bitmap_free(remapped);
@@ -631,6 +639,9 @@ int bitmap_writer_build(struct bitmap_writer *writer,
631639
the_repository);
632640
trace2_data_intmax("pack-bitmap-write", the_repository,
633641
"building_bitmaps_reused", reused_bitmaps_nr);
642+
trace2_data_intmax("pack-bitmap-write", the_repository,
643+
"building_bitmaps_pseudo_merge_reused",
644+
reused_pseudo_merge_bitmaps_nr);
634645

635646
stop_progress(&writer->progress);
636647

pack-bitmap.c

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1316,6 +1316,37 @@ static struct bitmap *find_boundary_objects(struct bitmap_index *bitmap_git,
13161316
return cb.base;
13171317
}
13181318

1319+
struct ewah_bitmap *pseudo_merge_bitmap_for_commit(struct bitmap_index *bitmap_git,
1320+
struct commit *commit)
1321+
{
1322+
struct commit_list *p;
1323+
struct bitmap *parents;
1324+
struct pseudo_merge *match = NULL;
1325+
1326+
if (!bitmap_git->pseudo_merges.nr)
1327+
return NULL;
1328+
1329+
parents = bitmap_new();
1330+
1331+
for (p = commit->parents; p; p = p->next) {
1332+
int pos = bitmap_position(bitmap_git, &p->item->object.oid);
1333+
if (pos < 0 || pos >= bitmap_num_objects(bitmap_git))
1334+
goto done;
1335+
1336+
bitmap_set(parents, pos);
1337+
}
1338+
1339+
match = pseudo_merge_for_parents(&bitmap_git->pseudo_merges,
1340+
parents);
1341+
1342+
done:
1343+
bitmap_free(parents);
1344+
if (match)
1345+
return pseudo_merge_bitmap(&bitmap_git->pseudo_merges, match);
1346+
1347+
return NULL;
1348+
}
1349+
13191350
static void unsatisfy_all_pseudo_merges(struct bitmap_index *bitmap_git)
13201351
{
13211352
uint32_t i;
@@ -2809,6 +2840,7 @@ void free_bitmap_index(struct bitmap_index *b)
28092840
*/
28102841
close_midx_revindex(b->midx);
28112842
}
2843+
free_pseudo_merge_map(&b->pseudo_merges);
28122844
free(b);
28132845
}
28142846

pack-bitmap.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -142,6 +142,8 @@ int rebuild_bitmap(const uint32_t *reposition,
142142
struct bitmap *dest);
143143
struct ewah_bitmap *bitmap_for_commit(struct bitmap_index *bitmap_git,
144144
struct commit *commit);
145+
struct ewah_bitmap *pseudo_merge_bitmap_for_commit(struct bitmap_index *bitmap_git,
146+
struct commit *commit);
145147
void bitmap_writer_select_commits(struct bitmap_writer *writer,
146148
struct commit **indexed_commits,
147149
unsigned int indexed_commits_nr);

pseudo-merge.c

Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -699,3 +699,58 @@ int cascade_pseudo_merges(const struct pseudo_merge_map *pm,
699699

700700
return ret;
701701
}
702+
703+
struct pseudo_merge *pseudo_merge_for_parents(const struct pseudo_merge_map *pm,
704+
struct bitmap *parents)
705+
{
706+
struct pseudo_merge *match = NULL;
707+
size_t i;
708+
709+
if (!pm->nr)
710+
return NULL;
711+
712+
/*
713+
* NOTE: this loop is quadratic in the worst-case (where no
714+
* matching pseudo-merge bitmaps are found), but in practice
715+
* this is OK for a few reasons:
716+
*
717+
* - Rejecting pseudo-merge bitmaps that do not match the
718+
* given commit is done quickly (i.e. `bitmap_equals_ewah()`
719+
* returns early when we know the two bitmaps aren't equal.
720+
*
721+
* - Already matched pseudo-merge bitmaps (which we track with
722+
* the `->satisfied` bit here) are skipped as potential
723+
* candidates.
724+
*
725+
* - The number of pseudo-merges should be small (in the
726+
* hundreds for most repositories).
727+
*
728+
* If in the future this semi-quadratic behavior does become a
729+
* problem, another approach would be to keep track of which
730+
* pseudo-merges are still "viable" after enumerating the
731+
* pseudo-merge commit's parents:
732+
*
733+
* - A pseudo-merge bitmap becomes non-viable when the bit(s)
734+
* corresponding to one or more parent(s) of the given
735+
* commit are not set in a candidate pseudo-merge's commits
736+
* bitmap.
737+
*
738+
* - After processing all bits, enumerate the remaining set of
739+
* viable pseudo-merge bitmaps, and check that their
740+
* popcount() matches the number of parents in the given
741+
* commit.
742+
*/
743+
for (i = 0; i < pm->nr; i++) {
744+
struct pseudo_merge *candidate = use_pseudo_merge(pm, &pm->v[i]);
745+
if (!candidate || candidate->satisfied)
746+
continue;
747+
if (!bitmap_equals_ewah(parents, candidate->commits))
748+
continue;
749+
750+
match = candidate;
751+
match->satisfied = 1;
752+
break;
753+
}
754+
755+
return match;
756+
}

pseudo-merge.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -206,4 +206,11 @@ int cascade_pseudo_merges(const struct pseudo_merge_map *pm,
206206
struct bitmap *result,
207207
struct bitmap *roots);
208208

209+
/*
210+
* Returns a pseudo-merge which contains the exact set of commits
211+
* listed in the "parents" bitamp, or NULL if none could be found.
212+
*/
213+
struct pseudo_merge *pseudo_merge_for_parents(const struct pseudo_merge_map *pm,
214+
struct bitmap *parents);
215+
209216
#endif

t/t5333-pseudo-merge-bitmaps.sh

Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,10 @@ test_pseudo_merges_cascades () {
2222
test_trace2_data bitmap pseudo_merges_cascades "$1"
2323
}
2424

25+
test_pseudo_merges_reused () {
26+
test_trace2_data pack-bitmap-write building_bitmaps_pseudo_merge_reused "$1"
27+
}
28+
2529
tag_everything () {
2630
git rev-list --all --no-object-names >in &&
2731
perl -lne '
@@ -325,4 +329,65 @@ test_expect_success 'pseudo-merge overlap stale traversal' '
325329
)
326330
'
327331

332+
test_expect_success 'pseudo-merge reuse' '
333+
git init pseudo-merge-reuse &&
334+
(
335+
cd pseudo-merge-reuse &&
336+
337+
stable="1641013200" && # 2022-01-01
338+
unstable="1672549200" && # 2023-01-01
339+
340+
GIT_COMMITTER_DATE="$stable +0000" &&
341+
export GIT_COMMITTER_DATE &&
342+
test_commit_bulk --notick 128 &&
343+
GIT_COMMITTER_DATE="$unstable +0000" &&
344+
export GIT_COMMITTER_DATE &&
345+
test_commit_bulk --notick 128 &&
346+
347+
tag_everything &&
348+
349+
git \
350+
-c bitmapPseudoMerge.test.pattern="refs/tags/" \
351+
-c bitmapPseudoMerge.test.maxMerges=1 \
352+
-c bitmapPseudoMerge.test.threshold=now \
353+
-c bitmapPseudoMerge.test.stableThreshold=$(($unstable - 1)) \
354+
-c bitmapPseudoMerge.test.stableSize=512 \
355+
repack -adb &&
356+
357+
test_pseudo_merges >merges &&
358+
test_line_count = 2 merges &&
359+
360+
test_pseudo_merge_commits 0 >stable-oids.before &&
361+
test_pseudo_merge_commits 1 >unstable-oids.before &&
362+
363+
: >trace2.txt &&
364+
GIT_TRACE2_EVENT=$PWD/trace2.txt git \
365+
-c bitmapPseudoMerge.test.pattern="refs/tags/" \
366+
-c bitmapPseudoMerge.test.maxMerges=2 \
367+
-c bitmapPseudoMerge.test.threshold=now \
368+
-c bitmapPseudoMerge.test.stableThreshold=$(($unstable - 1)) \
369+
-c bitmapPseudoMerge.test.stableSize=512 \
370+
repack -adb &&
371+
372+
test_pseudo_merges_reused 1 <trace2.txt &&
373+
374+
test_pseudo_merges >merges &&
375+
test_line_count = 3 merges &&
376+
377+
test_pseudo_merge_commits 0 >stable-oids.after &&
378+
for i in 1 2
379+
do
380+
test_pseudo_merge_commits $i || return 1
381+
done >unstable-oids.after &&
382+
383+
sort -u <stable-oids.before >expect &&
384+
sort -u <stable-oids.after >actual &&
385+
test_cmp expect actual &&
386+
387+
sort -u <unstable-oids.before >expect &&
388+
sort -u <unstable-oids.after >actual &&
389+
test_cmp expect actual
390+
)
391+
'
392+
328393
test_done

0 commit comments

Comments
 (0)