Skip to content

Commit 6325da1

Browse files
peffgitster
authored andcommitted
builtin/pack-objects.c: rewrite honor-pack-keep logic
Now that we have find_kept_pack_entry(), we don't have to manually keep hunting through every pack to find a possible "kept" duplicate of the object. This should be faster, assuming only a portion of your total packs are actually kept. Note that we have to re-order the logic a bit here; we can deal with the disqualifying situations first (e.g., finding the object in a non-local pack with --local), then "kept" situation(s), and then just fall back to other "--local" conditions. Here are the results from p5303 (measurements again taken on the kernel): Test HEAD^ HEAD ----------------------------------------------------------------------------------------------- 5303.5: repack (1) 57.26(54.59+10.84) 57.34(54.66+10.88) +0.1% 5303.6: repack with kept (1) 57.33(54.80+10.51) 57.38(54.83+10.49) +0.1% 5303.11: repack (50) 71.54(88.57+4.84) 71.70(88.99+4.74) +0.2% 5303.12: repack with kept (50) 85.12(102.05+4.94) 72.58(89.61+4.78) -14.7% 5303.17: repack (1000) 216.87(490.79+14.57) 217.19(491.72+14.25) +0.1% 5303.18: repack with kept (1000) 665.63(938.87+15.76) 246.12(520.07+14.93) -63.0% and the --stdin-packs timings: 5303.7: repack with --stdin-packs (1) 0.01(0.01+0.00) 0.00(0.00+0.00) -100.0% 5303.13: repack with --stdin-packs (50) 3.53(12.07+0.24) 3.43(11.75+0.24) -2.8% 5303.19: repack with --stdin-packs (1000) 195.83(371.82+8.10) 130.50(307.15+7.66) -33.4% So our repack with an empty .keep pack is roughly as fast as one without a .keep pack up to 50 packs. But the --stdin-packs case scales a little better, too. Notably, it is faster than a repack of the same size and a kept pack. It looks at fewer objects, of course, but the penalty for looking at many packs isn't as costly. Signed-off-by: Jeff King <[email protected]> Signed-off-by: Taylor Blau <[email protected]> Signed-off-by: Junio C Hamano <[email protected]>
1 parent fbf20ae commit 6325da1

File tree

1 file changed

+76
-51
lines changed

1 file changed

+76
-51
lines changed

builtin/pack-objects.c

Lines changed: 76 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -1188,7 +1188,8 @@ static int have_duplicate_entry(const struct object_id *oid,
11881188
return 1;
11891189
}
11901190

1191-
static int want_found_object(int exclude, struct packed_git *p)
1191+
static int want_found_object(const struct object_id *oid, int exclude,
1192+
struct packed_git *p)
11921193
{
11931194
if (exclude)
11941195
return 1;
@@ -1204,27 +1205,82 @@ static int want_found_object(int exclude, struct packed_git *p)
12041205
* make sure no copy of this object appears in _any_ pack that makes us
12051206
* to omit the object, so we need to check all the packs.
12061207
*
1207-
* We can however first check whether these options can possible matter;
1208+
* We can however first check whether these options can possibly matter;
12081209
* if they do not matter we know we want the object in generated pack.
12091210
* Otherwise, we signal "-1" at the end to tell the caller that we do
12101211
* not know either way, and it needs to check more packs.
12111212
*/
1212-
if (!ignore_packed_keep_on_disk &&
1213-
!ignore_packed_keep_in_core &&
1214-
(!local || !have_non_local_packs))
1215-
return 1;
12161213

1214+
/*
1215+
* Objects in packs borrowed from elsewhere are discarded regardless of
1216+
* if they appear in other packs that weren't borrowed.
1217+
*/
12171218
if (local && !p->pack_local)
12181219
return 0;
1219-
if (p->pack_local &&
1220-
((ignore_packed_keep_on_disk && p->pack_keep) ||
1221-
(ignore_packed_keep_in_core && p->pack_keep_in_core)))
1222-
return 0;
1220+
1221+
/*
1222+
* Then handle .keep first, as we have a fast(er) path there.
1223+
*/
1224+
if (ignore_packed_keep_on_disk || ignore_packed_keep_in_core) {
1225+
/*
1226+
* Set the flags for the kept-pack cache to be the ones we want
1227+
* to ignore.
1228+
*
1229+
* That is, if we are ignoring objects in on-disk keep packs,
1230+
* then we want to search through the on-disk keep and ignore
1231+
* the in-core ones.
1232+
*/
1233+
unsigned flags = 0;
1234+
if (ignore_packed_keep_on_disk)
1235+
flags |= ON_DISK_KEEP_PACKS;
1236+
if (ignore_packed_keep_in_core)
1237+
flags |= IN_CORE_KEEP_PACKS;
1238+
1239+
if (ignore_packed_keep_on_disk && p->pack_keep)
1240+
return 0;
1241+
if (ignore_packed_keep_in_core && p->pack_keep_in_core)
1242+
return 0;
1243+
if (has_object_kept_pack(oid, flags))
1244+
return 0;
1245+
}
1246+
1247+
/*
1248+
* At this point we know definitively that either we don't care about
1249+
* keep-packs, or the object is not in one. Keep checking other
1250+
* conditions...
1251+
*/
1252+
if (!local || !have_non_local_packs)
1253+
return 1;
12231254

12241255
/* we don't know yet; keep looking for more packs */
12251256
return -1;
12261257
}
12271258

1259+
static int want_object_in_pack_one(struct packed_git *p,
1260+
const struct object_id *oid,
1261+
int exclude,
1262+
struct packed_git **found_pack,
1263+
off_t *found_offset)
1264+
{
1265+
off_t offset;
1266+
1267+
if (p == *found_pack)
1268+
offset = *found_offset;
1269+
else
1270+
offset = find_pack_entry_one(oid->hash, p);
1271+
1272+
if (offset) {
1273+
if (!*found_pack) {
1274+
if (!is_pack_valid(p))
1275+
return -1;
1276+
*found_offset = offset;
1277+
*found_pack = p;
1278+
}
1279+
return want_found_object(oid, exclude, p);
1280+
}
1281+
return -1;
1282+
}
1283+
12281284
/*
12291285
* Check whether we want the object in the pack (e.g., we do not want
12301286
* objects found in non-local stores if the "--local" option was used).
@@ -1252,59 +1308,28 @@ static int want_object_in_pack(const struct object_id *oid,
12521308
* are present we will determine the answer right now.
12531309
*/
12541310
if (*found_pack) {
1255-
want = want_found_object(exclude, *found_pack);
1311+
want = want_found_object(oid, exclude, *found_pack);
12561312
if (want != -1)
12571313
return want;
12581314
}
12591315

12601316
for (m = get_multi_pack_index(the_repository); m; m = m->next) {
12611317
struct pack_entry e;
12621318
if (fill_midx_entry(the_repository, oid, &e, m)) {
1263-
struct packed_git *p = e.p;
1264-
off_t offset;
1265-
1266-
if (p == *found_pack)
1267-
offset = *found_offset;
1268-
else
1269-
offset = find_pack_entry_one(oid->hash, p);
1270-
1271-
if (offset) {
1272-
if (!*found_pack) {
1273-
if (!is_pack_valid(p))
1274-
continue;
1275-
*found_offset = offset;
1276-
*found_pack = p;
1277-
}
1278-
want = want_found_object(exclude, p);
1279-
if (want != -1)
1280-
return want;
1281-
}
1319+
want = want_object_in_pack_one(e.p, oid, exclude, found_pack, found_offset);
1320+
if (want != -1)
1321+
return want;
12821322
}
12831323
}
12841324

12851325
list_for_each(pos, get_packed_git_mru(the_repository)) {
12861326
struct packed_git *p = list_entry(pos, struct packed_git, mru);
1287-
off_t offset;
1288-
1289-
if (p == *found_pack)
1290-
offset = *found_offset;
1291-
else
1292-
offset = find_pack_entry_one(oid->hash, p);
1293-
1294-
if (offset) {
1295-
if (!*found_pack) {
1296-
if (!is_pack_valid(p))
1297-
continue;
1298-
*found_offset = offset;
1299-
*found_pack = p;
1300-
}
1301-
want = want_found_object(exclude, p);
1302-
if (!exclude && want > 0)
1303-
list_move(&p->mru,
1304-
get_packed_git_mru(the_repository));
1305-
if (want != -1)
1306-
return want;
1307-
}
1327+
want = want_object_in_pack_one(p, oid, exclude, found_pack, found_offset);
1328+
if (!exclude && want > 0)
1329+
list_move(&p->mru,
1330+
get_packed_git_mru(the_repository));
1331+
if (want != -1)
1332+
return want;
13081333
}
13091334

13101335
if (uri_protocols.nr) {

0 commit comments

Comments
 (0)