Skip to content

Commit 27fe152

Browse files
committed
Merge branch 'tb/multi-cruft-pack-refresh-fix'
Certain "cruft" objects would have never been refreshed when there are multiple cruft packs in the repository, which has been corrected. * tb/multi-cruft-pack-refresh-fix: builtin/pack-objects.c: freshen objects from existing cruft packs
2 parents f3db666 + 08f612b commit 27fe152

File tree

4 files changed

+171
-18
lines changed

4 files changed

+171
-18
lines changed

builtin/pack-objects.c

Lines changed: 101 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -206,6 +206,7 @@ static int have_non_local_packs;
206206
static int incremental;
207207
static int ignore_packed_keep_on_disk;
208208
static int ignore_packed_keep_in_core;
209+
static int ignore_packed_keep_in_core_has_cruft;
209210
static int allow_ofs_delta;
210211
static struct pack_idx_option pack_idx_opts;
211212
static const char *base_name;
@@ -1502,8 +1503,60 @@ static int have_duplicate_entry(const struct object_id *oid,
15021503
return 1;
15031504
}
15041505

1506+
static int want_cruft_object_mtime(struct repository *r,
1507+
const struct object_id *oid,
1508+
unsigned flags, uint32_t mtime)
1509+
{
1510+
struct packed_git **cache;
1511+
1512+
for (cache = kept_pack_cache(r, flags); *cache; cache++) {
1513+
struct packed_git *p = *cache;
1514+
off_t ofs;
1515+
uint32_t candidate_mtime;
1516+
1517+
ofs = find_pack_entry_one(oid, p);
1518+
if (!ofs)
1519+
continue;
1520+
1521+
/*
1522+
* We have a copy of the object 'oid' in a non-cruft
1523+
* pack. We can avoid packing an additional copy
1524+
* regardless of what the existing copy's mtime is since
1525+
* it is outside of a cruft pack.
1526+
*/
1527+
if (!p->is_cruft)
1528+
return 0;
1529+
1530+
/*
1531+
* If we have a copy of the object 'oid' in a cruft
1532+
* pack, then either read the cruft pack's mtime for
1533+
* that object, or, if that can't be loaded, assume the
1534+
* pack's mtime itself.
1535+
*/
1536+
if (!load_pack_mtimes(p)) {
1537+
uint32_t pos;
1538+
if (offset_to_pack_pos(p, ofs, &pos) < 0)
1539+
continue;
1540+
candidate_mtime = nth_packed_mtime(p, pos);
1541+
} else {
1542+
candidate_mtime = p->mtime;
1543+
}
1544+
1545+
/*
1546+
* We have a surviving copy of the object in a cruft
1547+
* pack whose mtime is greater than or equal to the one
1548+
* we are considering. We can thus avoid packing an
1549+
* additional copy of that object.
1550+
*/
1551+
if (mtime <= candidate_mtime)
1552+
return 0;
1553+
}
1554+
1555+
return -1;
1556+
}
1557+
15051558
static int want_found_object(const struct object_id *oid, int exclude,
1506-
struct packed_git *p)
1559+
struct packed_git *p, uint32_t mtime)
15071560
{
15081561
if (exclude)
15091562
return 1;
@@ -1553,12 +1606,29 @@ static int want_found_object(const struct object_id *oid, int exclude,
15531606
if (ignore_packed_keep_in_core)
15541607
flags |= IN_CORE_KEEP_PACKS;
15551608

1556-
if (ignore_packed_keep_on_disk && p->pack_keep)
1557-
return 0;
1558-
if (ignore_packed_keep_in_core && p->pack_keep_in_core)
1559-
return 0;
1560-
if (has_object_kept_pack(p->repo, oid, flags))
1561-
return 0;
1609+
/*
1610+
* If the object is in a pack that we want to ignore, *and* we
1611+
* don't have any cruft packs that are being retained, we can
1612+
* abort quickly.
1613+
*/
1614+
if (!ignore_packed_keep_in_core_has_cruft) {
1615+
if (ignore_packed_keep_on_disk && p->pack_keep)
1616+
return 0;
1617+
if (ignore_packed_keep_in_core && p->pack_keep_in_core)
1618+
return 0;
1619+
if (has_object_kept_pack(p->repo, oid, flags))
1620+
return 0;
1621+
} else {
1622+
/*
1623+
* But if there is at least one cruft pack which
1624+
* is being kept, we only want to include the
1625+
* provided object if it has a strictly greater
1626+
* mtime than any existing cruft copy.
1627+
*/
1628+
if (!want_cruft_object_mtime(p->repo, oid, flags,
1629+
mtime))
1630+
return 0;
1631+
}
15621632
}
15631633

15641634
/*
@@ -1577,7 +1647,8 @@ static int want_object_in_pack_one(struct packed_git *p,
15771647
const struct object_id *oid,
15781648
int exclude,
15791649
struct packed_git **found_pack,
1580-
off_t *found_offset)
1650+
off_t *found_offset,
1651+
uint32_t found_mtime)
15811652
{
15821653
off_t offset;
15831654

@@ -1593,7 +1664,7 @@ static int want_object_in_pack_one(struct packed_git *p,
15931664
*found_offset = offset;
15941665
*found_pack = p;
15951666
}
1596-
return want_found_object(oid, exclude, p);
1667+
return want_found_object(oid, exclude, p, found_mtime);
15971668
}
15981669
return -1;
15991670
}
@@ -1607,10 +1678,11 @@ static int want_object_in_pack_one(struct packed_git *p,
16071678
* function finds if there is any pack that has the object and returns the pack
16081679
* and its offset in these variables.
16091680
*/
1610-
static int want_object_in_pack(const struct object_id *oid,
1611-
int exclude,
1612-
struct packed_git **found_pack,
1613-
off_t *found_offset)
1681+
static int want_object_in_pack_mtime(const struct object_id *oid,
1682+
int exclude,
1683+
struct packed_git **found_pack,
1684+
off_t *found_offset,
1685+
uint32_t found_mtime)
16141686
{
16151687
int want;
16161688
struct list_head *pos;
@@ -1625,7 +1697,8 @@ static int want_object_in_pack(const struct object_id *oid,
16251697
* are present we will determine the answer right now.
16261698
*/
16271699
if (*found_pack) {
1628-
want = want_found_object(oid, exclude, *found_pack);
1700+
want = want_found_object(oid, exclude, *found_pack,
1701+
found_mtime);
16291702
if (want != -1)
16301703
return want;
16311704

@@ -1636,15 +1709,15 @@ static int want_object_in_pack(const struct object_id *oid,
16361709
for (m = get_multi_pack_index(the_repository); m; m = m->next) {
16371710
struct pack_entry e;
16381711
if (fill_midx_entry(the_repository, oid, &e, m)) {
1639-
want = want_object_in_pack_one(e.p, oid, exclude, found_pack, found_offset);
1712+
want = want_object_in_pack_one(e.p, oid, exclude, found_pack, found_offset, found_mtime);
16401713
if (want != -1)
16411714
return want;
16421715
}
16431716
}
16441717

16451718
list_for_each(pos, get_packed_git_mru(the_repository)) {
16461719
struct packed_git *p = list_entry(pos, struct packed_git, mru);
1647-
want = want_object_in_pack_one(p, oid, exclude, found_pack, found_offset);
1720+
want = want_object_in_pack_one(p, oid, exclude, found_pack, found_offset, found_mtime);
16481721
if (!exclude && want > 0)
16491722
list_move(&p->mru,
16501723
get_packed_git_mru(the_repository));
@@ -1674,6 +1747,15 @@ static int want_object_in_pack(const struct object_id *oid,
16741747
return 1;
16751748
}
16761749

1750+
static inline int want_object_in_pack(const struct object_id *oid,
1751+
int exclude,
1752+
struct packed_git **found_pack,
1753+
off_t *found_offset)
1754+
{
1755+
return want_object_in_pack_mtime(oid, exclude, found_pack, found_offset,
1756+
0);
1757+
}
1758+
16771759
static struct object_entry *create_object_entry(const struct object_id *oid,
16781760
enum object_type type,
16791761
uint32_t hash,
@@ -3606,7 +3688,7 @@ static void add_cruft_object_entry(const struct object_id *oid, enum object_type
36063688
entry->no_try_delta = no_try_delta(name);
36073689
}
36083690
} else {
3609-
if (!want_object_in_pack(oid, 0, &pack, &offset))
3691+
if (!want_object_in_pack_mtime(oid, 0, &pack, &offset, mtime))
36103692
return;
36113693
if (!pack && type == OBJ_BLOB && !has_loose_object(oid)) {
36123694
/*
@@ -3680,6 +3762,8 @@ static void mark_pack_kept_in_core(struct string_list *packs, unsigned keep)
36803762
struct packed_git *p = item->util;
36813763
if (!p)
36823764
die(_("could not find pack '%s'"), item->string);
3765+
if (p->is_cruft && keep)
3766+
ignore_packed_keep_in_core_has_cruft = 1;
36833767
p->pack_keep_in_core = keep;
36843768
}
36853769
}

packfile.c

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
#include "commit-graph.h"
2525
#include "pack-revindex.h"
2626
#include "promisor-remote.h"
27+
#include "pack-mtimes.h"
2728

2829
char *odb_pack_name(struct repository *r, struct strbuf *buf,
2930
const unsigned char *hash, const char *ext)
@@ -2107,7 +2108,7 @@ static void maybe_invalidate_kept_pack_cache(struct repository *r,
21072108
r->objects->kept_pack_cache.flags = 0;
21082109
}
21092110

2110-
static struct packed_git **kept_pack_cache(struct repository *r, unsigned flags)
2111+
struct packed_git **kept_pack_cache(struct repository *r, unsigned flags)
21112112
{
21122113
maybe_invalidate_kept_pack_cache(r, flags);
21132114

packfile.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -197,6 +197,8 @@ int has_object_pack(struct repository *r, const struct object_id *oid);
197197
int has_object_kept_pack(struct repository *r, const struct object_id *oid,
198198
unsigned flags);
199199

200+
struct packed_git **kept_pack_cache(struct repository *r, unsigned flags);
201+
200202
/*
201203
* Return 1 if an object in a promisor packfile is or refers to the given
202204
* object, 0 otherwise.

t/t7704-repack-cruft.sh

Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -304,6 +304,72 @@ test_expect_success '--max-cruft-size with freshened objects (packed)' '
304304
)
305305
'
306306

307+
test_expect_success '--max-cruft-size with freshened objects (previously cruft)' '
308+
repo="max-cruft-size-threshold" &&
309+
310+
test_when_finished "rm -fr $repo" &&
311+
git init "$repo" &&
312+
(
313+
cd "$repo" &&
314+
315+
test_commit base &&
316+
foo="$(generate_random_blob foo $((2*1024*1024)))" &&
317+
bar="$(generate_random_blob bar $((2*1024*1024)))" &&
318+
baz="$(generate_random_blob baz $((2*1024*1024)))" &&
319+
320+
test-tool chmtime --get -100000 \
321+
"$objdir/$(test_oid_to_path "$foo")" >foo.old &&
322+
test-tool chmtime --get -100000 \
323+
"$objdir/$(test_oid_to_path "$bar")" >bar.old &&
324+
test-tool chmtime --get -100000 \
325+
"$objdir/$(test_oid_to_path "$baz")" >baz.old &&
326+
327+
git repack --cruft -d &&
328+
329+
# Make an identical copy of foo stored in a pack with a more
330+
# recent mtime.
331+
foo="$(generate_random_blob foo $((2*1024*1024)))" &&
332+
foo_pack="$(echo "$foo" | git pack-objects $packdir/pack)" &&
333+
test-tool chmtime --get -100 \
334+
"$packdir/pack-$foo_pack.pack" >foo.new &&
335+
git prune-packed &&
336+
337+
# Make a loose copy of bar, also with a more recent mtime.
338+
bar="$(generate_random_blob bar $((2*1024*1024)))" &&
339+
test-tool chmtime --get -100 \
340+
"$objdir/$(test_oid_to_path "$bar")" >bar.new &&
341+
342+
# Make a new cruft object $quux to ensure we do not
343+
# generate an identical pack to the existing cruft
344+
# pack.
345+
quux="$(generate_random_blob quux $((1024)))" &&
346+
test-tool chmtime --get -100 \
347+
"$objdir/$(test_oid_to_path "$quux")" >quux.new &&
348+
349+
git repack --cruft --max-cruft-size=3M -d &&
350+
351+
for p in $packdir/pack-*.mtimes
352+
do
353+
test-tool pack-mtimes "$(basename "$p")" || return 1
354+
done >actual.raw &&
355+
sort actual.raw >actual &&
356+
357+
# Among the set of all cruft packs, we should see both
358+
# mtimes for object $foo and $bar, as well as the
359+
# single new copy of $baz.
360+
sort >expect <<-EOF &&
361+
$foo $(cat foo.old)
362+
$foo $(cat foo.new)
363+
$bar $(cat bar.old)
364+
$bar $(cat bar.new)
365+
$baz $(cat baz.old)
366+
$quux $(cat quux.new)
367+
EOF
368+
369+
test_cmp expect actual
370+
)
371+
'
372+
307373
test_expect_success '--max-cruft-size with pruning' '
308374
git init max-cruft-size-prune &&
309375
(

0 commit comments

Comments
 (0)