Skip to content

Commit f2ffc74

Browse files
committed
Merge branch 'tb/pack-bitmap-traversal-with-boundary'
The object traversal using reachability bitmap done by "pack-object" has been tweaked to take advantage of the fact that using "boundary" commits as representative of all the uninteresting ones can save quite a lot of object enumeration. * tb/pack-bitmap-traversal-with-boundary: pack-bitmap.c: use commit boundary during bitmap traversal pack-bitmap.c: extract `fill_in_bitmap()` object: add object_array initializer helper function
2 parents 4dd0469 + b0afdce commit f2ffc74

File tree

11 files changed

+284
-40
lines changed

11 files changed

+284
-40
lines changed

Documentation/config/feature.txt

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,9 @@ feature.experimental::
1414
+
1515
* `fetch.negotiationAlgorithm=skipping` may improve fetch negotiation times by
1616
skipping more commits at a time, reducing the number of round trips.
17+
+
18+
* `pack.useBitmapBoundaryTraversal=true` may improve bitmap traversal times by
19+
walking fewer objects.
1720

1821
feature.manyFiles::
1922
Enable config options that optimize for repos with many files in the

Documentation/config/pack.txt

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -123,6 +123,23 @@ pack.useBitmaps::
123123
true. You should not generally need to turn this off unless
124124
you are debugging pack bitmaps.
125125

126+
pack.useBitmapBoundaryTraversal::
127+
When true, Git will use an experimental algorithm for computing
128+
reachability queries with bitmaps. Instead of building up
129+
complete bitmaps for all of the negated tips and then OR-ing
130+
them together, consider negated tips with existing bitmaps as
131+
additive (i.e. OR-ing them into the result if they exist,
132+
ignoring them otherwise), and build up a bitmap at the boundary
133+
instead.
134+
+
135+
When using this algorithm, Git may include too many objects as a result
136+
of not opening up trees belonging to certain UNINTERESTING commits. This
137+
inexactness matches the non-bitmap traversal algorithm.
138+
+
139+
In many cases, this can provide a speed-up over the exact algorithm,
140+
particularly when there is poor bitmap coverage of the negated side of
141+
the query.
142+
126143
pack.useSparse::
127144
When true, git will default to using the '--sparse' option in
128145
'git pack-objects' when the '--revs' option is present. This

ci/run-build-and-tests.sh

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@ linux-TEST-vars)
2929
export GIT_TEST_DEFAULT_INITIAL_BRANCH_NAME=master
3030
export GIT_TEST_NO_WRITE_REV_INDEX=1
3131
export GIT_TEST_CHECKOUT_WORKERS=2
32+
export GIT_TEST_PACK_USE_BITMAP_BOUNDARY_TRAVERSAL=1
3233
;;
3334
linux-clang)
3435
export GIT_TEST_DEFAULT_HASH=sha1

object.c

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -356,6 +356,12 @@ void object_list_free(struct object_list **list)
356356
*/
357357
static char object_array_slopbuf[1];
358358

359+
void object_array_init(struct object_array *array)
360+
{
361+
struct object_array blank = OBJECT_ARRAY_INIT;
362+
memcpy(array, &blank, sizeof(*array));
363+
}
364+
359365
void add_object_array_with_path(struct object *obj, const char *name,
360366
struct object_array *array,
361367
unsigned mode, const char *path)

object.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,8 @@ struct object_array {
5858

5959
#define OBJECT_ARRAY_INIT { 0 }
6060

61+
void object_array_init(struct object_array *array);
62+
6163
/*
6264
* object flag allocation:
6365
* revision.h: 0---------10 15 23------27

pack-bitmap.c

Lines changed: 202 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -1043,6 +1043,160 @@ static int add_commit_to_bitmap(struct bitmap_index *bitmap_git,
10431043
return 1;
10441044
}
10451045

1046+
static struct bitmap *fill_in_bitmap(struct bitmap_index *bitmap_git,
1047+
struct rev_info *revs,
1048+
struct bitmap *base,
1049+
struct bitmap *seen)
1050+
{
1051+
struct include_data incdata;
1052+
struct bitmap_show_data show_data;
1053+
1054+
if (!base)
1055+
base = bitmap_new();
1056+
1057+
incdata.bitmap_git = bitmap_git;
1058+
incdata.base = base;
1059+
incdata.seen = seen;
1060+
1061+
revs->include_check = should_include;
1062+
revs->include_check_obj = should_include_obj;
1063+
revs->include_check_data = &incdata;
1064+
1065+
if (prepare_revision_walk(revs))
1066+
die(_("revision walk setup failed"));
1067+
1068+
show_data.bitmap_git = bitmap_git;
1069+
show_data.base = base;
1070+
1071+
traverse_commit_list(revs, show_commit, show_object, &show_data);
1072+
1073+
revs->include_check = NULL;
1074+
revs->include_check_obj = NULL;
1075+
revs->include_check_data = NULL;
1076+
1077+
return base;
1078+
}
1079+
1080+
struct bitmap_boundary_cb {
1081+
struct bitmap_index *bitmap_git;
1082+
struct bitmap *base;
1083+
1084+
struct object_array boundary;
1085+
};
1086+
1087+
static void show_boundary_commit(struct commit *commit, void *_data)
1088+
{
1089+
struct bitmap_boundary_cb *data = _data;
1090+
1091+
if (commit->object.flags & BOUNDARY)
1092+
add_object_array(&commit->object, "", &data->boundary);
1093+
1094+
if (commit->object.flags & UNINTERESTING) {
1095+
if (bitmap_walk_contains(data->bitmap_git, data->base,
1096+
&commit->object.oid))
1097+
return;
1098+
1099+
add_commit_to_bitmap(data->bitmap_git, &data->base, commit);
1100+
}
1101+
}
1102+
1103+
static void show_boundary_object(struct object *object,
1104+
const char *name, void *data)
1105+
{
1106+
BUG("should not be called");
1107+
}
1108+
1109+
static struct bitmap *find_boundary_objects(struct bitmap_index *bitmap_git,
1110+
struct rev_info *revs,
1111+
struct object_list *roots)
1112+
{
1113+
struct bitmap_boundary_cb cb;
1114+
struct object_list *root;
1115+
unsigned int i;
1116+
unsigned int tmp_blobs, tmp_trees, tmp_tags;
1117+
int any_missing = 0;
1118+
1119+
cb.bitmap_git = bitmap_git;
1120+
cb.base = bitmap_new();
1121+
object_array_init(&cb.boundary);
1122+
1123+
revs->ignore_missing_links = 1;
1124+
1125+
/*
1126+
* OR in any existing reachability bitmaps among `roots` into
1127+
* `cb.base`.
1128+
*/
1129+
for (root = roots; root; root = root->next) {
1130+
struct object *object = root->item;
1131+
if (object->type != OBJ_COMMIT ||
1132+
bitmap_walk_contains(bitmap_git, cb.base, &object->oid))
1133+
continue;
1134+
1135+
if (add_commit_to_bitmap(bitmap_git, &cb.base,
1136+
(struct commit *)object))
1137+
continue;
1138+
1139+
any_missing = 1;
1140+
}
1141+
1142+
if (!any_missing)
1143+
goto cleanup;
1144+
1145+
tmp_blobs = revs->blob_objects;
1146+
tmp_trees = revs->tree_objects;
1147+
tmp_tags = revs->blob_objects;
1148+
revs->blob_objects = 0;
1149+
revs->tree_objects = 0;
1150+
revs->tag_objects = 0;
1151+
1152+
/*
1153+
* We didn't have complete coverage of the roots. First setup a
1154+
* revision walk to (a) OR in any bitmaps that are UNINTERESTING
1155+
* between the tips and boundary, and (b) record the boundary.
1156+
*/
1157+
trace2_region_enter("pack-bitmap", "boundary-prepare", the_repository);
1158+
if (prepare_revision_walk(revs))
1159+
die("revision walk setup failed");
1160+
trace2_region_leave("pack-bitmap", "boundary-prepare", the_repository);
1161+
1162+
trace2_region_enter("pack-bitmap", "boundary-traverse", the_repository);
1163+
revs->boundary = 1;
1164+
traverse_commit_list_filtered(revs,
1165+
show_boundary_commit,
1166+
show_boundary_object,
1167+
&cb, NULL);
1168+
revs->boundary = 0;
1169+
trace2_region_leave("pack-bitmap", "boundary-traverse", the_repository);
1170+
1171+
revs->blob_objects = tmp_blobs;
1172+
revs->tree_objects = tmp_trees;
1173+
revs->tag_objects = tmp_tags;
1174+
1175+
reset_revision_walk();
1176+
clear_object_flags(UNINTERESTING);
1177+
1178+
/*
1179+
* Then add the boundary commit(s) as fill-in traversal tips.
1180+
*/
1181+
trace2_region_enter("pack-bitmap", "boundary-fill-in", the_repository);
1182+
for (i = 0; i < cb.boundary.nr; i++) {
1183+
struct object *obj = cb.boundary.objects[i].item;
1184+
if (bitmap_walk_contains(bitmap_git, cb.base, &obj->oid))
1185+
obj->flags |= SEEN;
1186+
else
1187+
add_pending_object(revs, obj, "");
1188+
}
1189+
if (revs->pending.nr)
1190+
cb.base = fill_in_bitmap(bitmap_git, revs, cb.base, NULL);
1191+
trace2_region_leave("pack-bitmap", "boundary-fill-in", the_repository);
1192+
1193+
cleanup:
1194+
object_array_clear(&cb.boundary);
1195+
revs->ignore_missing_links = 0;
1196+
1197+
return cb.base;
1198+
}
1199+
10461200
static struct bitmap *find_objects(struct bitmap_index *bitmap_git,
10471201
struct rev_info *revs,
10481202
struct object_list *roots,
@@ -1109,33 +1263,19 @@ static struct bitmap *find_objects(struct bitmap_index *bitmap_git,
11091263
}
11101264

11111265
if (needs_walk) {
1112-
struct include_data incdata;
1113-
struct bitmap_show_data show_data;
1114-
1115-
if (!base)
1116-
base = bitmap_new();
1117-
1118-
incdata.bitmap_git = bitmap_git;
1119-
incdata.base = base;
1120-
incdata.seen = seen;
1121-
1122-
revs->include_check = should_include;
1123-
revs->include_check_obj = should_include_obj;
1124-
revs->include_check_data = &incdata;
1125-
1126-
if (prepare_revision_walk(revs))
1127-
die(_("revision walk setup failed"));
1128-
1129-
show_data.bitmap_git = bitmap_git;
1130-
show_data.base = base;
1131-
1132-
traverse_commit_list(revs,
1133-
show_commit, show_object,
1134-
&show_data);
1135-
1136-
revs->include_check = NULL;
1137-
revs->include_check_obj = NULL;
1138-
revs->include_check_data = NULL;
1266+
/*
1267+
* This fill-in traversal may walk over some objects
1268+
* again, since we have already traversed in order to
1269+
* find the boundary.
1270+
*
1271+
* But this extra walk should be extremely cheap, since
1272+
* all commit objects are loaded into memory, and
1273+
* because we skip walking to parents that are
1274+
* UNINTERESTING, since it will be marked in the haves
1275+
* bitmap already (or it has an on-disk bitmap, since
1276+
* OR-ing it in covers all of its ancestors).
1277+
*/
1278+
base = fill_in_bitmap(bitmap_git, revs, base, seen);
11391279
}
11401280

11411281
return base;
@@ -1528,6 +1668,7 @@ struct bitmap_index *prepare_bitmap_walk(struct rev_info *revs,
15281668
int filter_provided_objects)
15291669
{
15301670
unsigned int i;
1671+
int use_boundary_traversal;
15311672

15321673
struct object_list *wants = NULL;
15331674
struct object_list *haves = NULL;
@@ -1578,13 +1719,21 @@ struct bitmap_index *prepare_bitmap_walk(struct rev_info *revs,
15781719
object_list_insert(object, &wants);
15791720
}
15801721

1581-
/*
1582-
* if we have a HAVES list, but none of those haves is contained
1583-
* in the packfile that has a bitmap, we don't have anything to
1584-
* optimize here
1585-
*/
1586-
if (haves && !in_bitmapped_pack(bitmap_git, haves))
1587-
goto cleanup;
1722+
use_boundary_traversal = git_env_bool(GIT_TEST_PACK_USE_BITMAP_BOUNDARY_TRAVERSAL, -1);
1723+
if (use_boundary_traversal < 0) {
1724+
prepare_repo_settings(revs->repo);
1725+
use_boundary_traversal = revs->repo->settings.pack_use_bitmap_boundary_traversal;
1726+
}
1727+
1728+
if (!use_boundary_traversal) {
1729+
/*
1730+
* if we have a HAVES list, but none of those haves is contained
1731+
* in the packfile that has a bitmap, we don't have anything to
1732+
* optimize here
1733+
*/
1734+
if (haves && !in_bitmapped_pack(bitmap_git, haves))
1735+
goto cleanup;
1736+
}
15881737

15891738
/* if we don't want anything, we're done here */
15901739
if (!wants)
@@ -1598,18 +1747,32 @@ struct bitmap_index *prepare_bitmap_walk(struct rev_info *revs,
15981747
if (load_bitmap(revs->repo, bitmap_git) < 0)
15991748
goto cleanup;
16001749

1601-
object_array_clear(&revs->pending);
1750+
if (!use_boundary_traversal)
1751+
object_array_clear(&revs->pending);
16021752

16031753
if (haves) {
1604-
revs->ignore_missing_links = 1;
1605-
haves_bitmap = find_objects(bitmap_git, revs, haves, NULL);
1606-
reset_revision_walk();
1607-
revs->ignore_missing_links = 0;
1754+
if (use_boundary_traversal) {
1755+
trace2_region_enter("pack-bitmap", "haves/boundary", the_repository);
1756+
haves_bitmap = find_boundary_objects(bitmap_git, revs, haves);
1757+
trace2_region_leave("pack-bitmap", "haves/boundary", the_repository);
1758+
} else {
1759+
trace2_region_enter("pack-bitmap", "haves/classic", the_repository);
1760+
revs->ignore_missing_links = 1;
1761+
haves_bitmap = find_objects(bitmap_git, revs, haves, NULL);
1762+
reset_revision_walk();
1763+
revs->ignore_missing_links = 0;
1764+
trace2_region_leave("pack-bitmap", "haves/classic", the_repository);
1765+
}
16081766

16091767
if (!haves_bitmap)
16101768
BUG("failed to perform bitmap walk");
16111769
}
16121770

1771+
if (use_boundary_traversal) {
1772+
object_array_clear(&revs->pending);
1773+
reset_revision_walk();
1774+
}
1775+
16131776
wants_bitmap = find_objects(bitmap_git, revs, wants, haves_bitmap);
16141777

16151778
if (!wants_bitmap)

pack-bitmap.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,10 @@ void traverse_bitmap_commit_list(struct bitmap_index *,
6262
void test_bitmap_walk(struct rev_info *revs);
6363
int test_bitmap_commits(struct repository *r);
6464
int test_bitmap_hashes(struct repository *r);
65+
66+
#define GIT_TEST_PACK_USE_BITMAP_BOUNDARY_TRAVERSAL \
67+
"GIT_TEST_PACK_USE_BITMAP_BOUNDARY_TRAVERSAL"
68+
6569
struct bitmap_index *prepare_bitmap_walk(struct rev_info *revs,
6670
int filter_provided_objects);
6771
uint32_t midx_preferred_pack(struct bitmap_index *bitmap_git);

repo-settings.c

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41,8 +41,10 @@ void prepare_repo_settings(struct repository *r)
4141
repo_cfg_bool(r, "feature.experimental", &experimental, 0);
4242

4343
/* Defaults modified by feature.* */
44-
if (experimental)
44+
if (experimental) {
4545
r->settings.fetch_negotiation_algorithm = FETCH_NEGOTIATION_SKIPPING;
46+
r->settings.pack_use_bitmap_boundary_traversal = 1;
47+
}
4648
if (manyfiles) {
4749
r->settings.index_version = 4;
4850
r->settings.index_skip_hash = 1;
@@ -62,6 +64,9 @@ void prepare_repo_settings(struct repository *r)
6264
repo_cfg_bool(r, "index.sparse", &r->settings.sparse_index, 0);
6365
repo_cfg_bool(r, "index.skiphash", &r->settings.index_skip_hash, r->settings.index_skip_hash);
6466
repo_cfg_bool(r, "pack.readreverseindex", &r->settings.pack_read_reverse_index, 1);
67+
repo_cfg_bool(r, "pack.usebitmapboundarytraversal",
68+
&r->settings.pack_use_bitmap_boundary_traversal,
69+
r->settings.pack_use_bitmap_boundary_traversal);
6570

6671
/*
6772
* The GIT_TEST_MULTI_PACK_INDEX variable is special in that

repository.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@ struct repo_settings {
3737
int command_requires_full_index;
3838
int sparse_index;
3939
int pack_read_reverse_index;
40+
int pack_use_bitmap_boundary_traversal;
4041

4142
struct fsmonitor_settings *fsmonitor; /* lazily loaded */
4243

t/README

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -442,6 +442,10 @@ GIT_TEST_INDEX_VERSION=<n> exercises the index read/write code path
442442
for the index version specified. Can be set to any valid version
443443
(currently 2, 3, or 4).
444444

445+
GIT_TEST_PACK_USE_BITMAP_BOUNDARY_TRAVERSAL=<boolean> if enabled will
446+
use the boundary-based bitmap traversal algorithm. See the documentation
447+
of `pack.useBitmapBoundaryTraversal` for more details.
448+
445449
GIT_TEST_PACK_SPARSE=<boolean> if disabled will default the pack-objects
446450
builtin to use the non-sparse object walk. This can still be overridden by
447451
the --sparse command-line argument.

0 commit comments

Comments
 (0)