Skip to content

Commit 3277e3f

Browse files
committed
Merge branch 'tb/multi-cruft-pack-refresh-fix' into jch
Certain "cruft" objects would have never been refreshed when there are multiple cruft packs in the repository, which has been corrected. * tb/multi-cruft-pack-refresh-fix: builtin/pack-objects.c: freshen objects from existing cruft packs builtin/repack.c: simplify cruft pack aggregation
2 parents 4738dd8 + 2dee3ae commit 3277e3f

File tree

5 files changed

+171
-96
lines changed

5 files changed

+171
-96
lines changed

builtin/pack-objects.c

Lines changed: 101 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -206,6 +206,7 @@ static int have_non_local_packs;
206206
static int incremental;
207207
static int ignore_packed_keep_on_disk;
208208
static int ignore_packed_keep_in_core;
209+
static int ignore_packed_keep_in_core_has_cruft;
209210
static int allow_ofs_delta;
210211
static struct pack_idx_option pack_idx_opts;
211212
static const char *base_name;
@@ -1503,8 +1504,60 @@ static int have_duplicate_entry(const struct object_id *oid,
15031504
return 1;
15041505
}
15051506

1507+
static int want_cruft_object_mtime(struct repository *r,
1508+
const struct object_id *oid,
1509+
unsigned flags, uint32_t mtime)
1510+
{
1511+
struct packed_git **cache;
1512+
1513+
for (cache = kept_pack_cache(r, flags); *cache; cache++) {
1514+
struct packed_git *p = *cache;
1515+
off_t ofs;
1516+
uint32_t candidate_mtime;
1517+
1518+
ofs = find_pack_entry_one(oid, p);
1519+
if (!ofs)
1520+
continue;
1521+
1522+
/*
1523+
* We have a copy of the object 'oid' in a non-cruft
1524+
* pack. We can avoid packing an additional copy
1525+
* regardless of what the existing copy's mtime is since
1526+
* it is outside of a cruft pack.
1527+
*/
1528+
if (!p->is_cruft)
1529+
return 0;
1530+
1531+
/*
1532+
* If we have a copy of the object 'oid' in a cruft
1533+
* pack, then either read the cruft pack's mtime for
1534+
* that object, or, if that can't be loaded, assume the
1535+
* pack's mtime itself.
1536+
*/
1537+
if (!load_pack_mtimes(p)) {
1538+
uint32_t pos;
1539+
if (offset_to_pack_pos(p, ofs, &pos) < 0)
1540+
continue;
1541+
candidate_mtime = nth_packed_mtime(p, pos);
1542+
} else {
1543+
candidate_mtime = p->mtime;
1544+
}
1545+
1546+
/*
1547+
* We have a surviving copy of the object in a cruft
1548+
* pack whose mtime is greater than or equal to the one
1549+
* we are considering. We can thus avoid packing an
1550+
* additional copy of that object.
1551+
*/
1552+
if (mtime <= candidate_mtime)
1553+
return 0;
1554+
}
1555+
1556+
return -1;
1557+
}
1558+
15061559
static int want_found_object(const struct object_id *oid, int exclude,
1507-
struct packed_git *p)
1560+
struct packed_git *p, uint32_t mtime)
15081561
{
15091562
if (exclude)
15101563
return 1;
@@ -1554,12 +1607,29 @@ static int want_found_object(const struct object_id *oid, int exclude,
15541607
if (ignore_packed_keep_in_core)
15551608
flags |= IN_CORE_KEEP_PACKS;
15561609

1557-
if (ignore_packed_keep_on_disk && p->pack_keep)
1558-
return 0;
1559-
if (ignore_packed_keep_in_core && p->pack_keep_in_core)
1560-
return 0;
1561-
if (has_object_kept_pack(p->repo, oid, flags))
1562-
return 0;
1610+
/*
1611+
* If the object is in a pack that we want to ignore, *and* we
1612+
* don't have any cruft packs that are being retained, we can
1613+
* abort quickly.
1614+
*/
1615+
if (!ignore_packed_keep_in_core_has_cruft) {
1616+
if (ignore_packed_keep_on_disk && p->pack_keep)
1617+
return 0;
1618+
if (ignore_packed_keep_in_core && p->pack_keep_in_core)
1619+
return 0;
1620+
if (has_object_kept_pack(p->repo, oid, flags))
1621+
return 0;
1622+
} else {
1623+
/*
1624+
* But if there is at least one cruft pack which
1625+
* is being kept, we only want to include the
1626+
* provided object if it has a strictly greater
1627+
* mtime than any existing cruft copy.
1628+
*/
1629+
if (!want_cruft_object_mtime(p->repo, oid, flags,
1630+
mtime))
1631+
return 0;
1632+
}
15631633
}
15641634

15651635
/*
@@ -1578,7 +1648,8 @@ static int want_object_in_pack_one(struct packed_git *p,
15781648
const struct object_id *oid,
15791649
int exclude,
15801650
struct packed_git **found_pack,
1581-
off_t *found_offset)
1651+
off_t *found_offset,
1652+
uint32_t found_mtime)
15821653
{
15831654
off_t offset;
15841655

@@ -1594,7 +1665,7 @@ static int want_object_in_pack_one(struct packed_git *p,
15941665
*found_offset = offset;
15951666
*found_pack = p;
15961667
}
1597-
return want_found_object(oid, exclude, p);
1668+
return want_found_object(oid, exclude, p, found_mtime);
15981669
}
15991670
return -1;
16001671
}
@@ -1608,10 +1679,11 @@ static int want_object_in_pack_one(struct packed_git *p,
16081679
* function finds if there is any pack that has the object and returns the pack
16091680
* and its offset in these variables.
16101681
*/
1611-
static int want_object_in_pack(const struct object_id *oid,
1612-
int exclude,
1613-
struct packed_git **found_pack,
1614-
off_t *found_offset)
1682+
static int want_object_in_pack_mtime(const struct object_id *oid,
1683+
int exclude,
1684+
struct packed_git **found_pack,
1685+
off_t *found_offset,
1686+
uint32_t found_mtime)
16151687
{
16161688
int want;
16171689
struct list_head *pos;
@@ -1626,7 +1698,8 @@ static int want_object_in_pack(const struct object_id *oid,
16261698
* are present we will determine the answer right now.
16271699
*/
16281700
if (*found_pack) {
1629-
want = want_found_object(oid, exclude, *found_pack);
1701+
want = want_found_object(oid, exclude, *found_pack,
1702+
found_mtime);
16301703
if (want != -1)
16311704
return want;
16321705

@@ -1637,15 +1710,15 @@ static int want_object_in_pack(const struct object_id *oid,
16371710
for (m = get_multi_pack_index(the_repository); m; m = m->next) {
16381711
struct pack_entry e;
16391712
if (fill_midx_entry(the_repository, oid, &e, m)) {
1640-
want = want_object_in_pack_one(e.p, oid, exclude, found_pack, found_offset);
1713+
want = want_object_in_pack_one(e.p, oid, exclude, found_pack, found_offset, found_mtime);
16411714
if (want != -1)
16421715
return want;
16431716
}
16441717
}
16451718

16461719
list_for_each(pos, get_packed_git_mru(the_repository)) {
16471720
struct packed_git *p = list_entry(pos, struct packed_git, mru);
1648-
want = want_object_in_pack_one(p, oid, exclude, found_pack, found_offset);
1721+
want = want_object_in_pack_one(p, oid, exclude, found_pack, found_offset, found_mtime);
16491722
if (!exclude && want > 0)
16501723
list_move(&p->mru,
16511724
get_packed_git_mru(the_repository));
@@ -1675,6 +1748,15 @@ static int want_object_in_pack(const struct object_id *oid,
16751748
return 1;
16761749
}
16771750

1751+
static inline int want_object_in_pack(const struct object_id *oid,
1752+
int exclude,
1753+
struct packed_git **found_pack,
1754+
off_t *found_offset)
1755+
{
1756+
return want_object_in_pack_mtime(oid, exclude, found_pack, found_offset,
1757+
0);
1758+
}
1759+
16781760
static struct object_entry *create_object_entry(const struct object_id *oid,
16791761
enum object_type type,
16801762
uint32_t hash,
@@ -3607,7 +3689,7 @@ static void add_cruft_object_entry(const struct object_id *oid, enum object_type
36073689
entry->no_try_delta = no_try_delta(name);
36083690
}
36093691
} else {
3610-
if (!want_object_in_pack(oid, 0, &pack, &offset))
3692+
if (!want_object_in_pack_mtime(oid, 0, &pack, &offset, mtime))
36113693
return;
36123694
if (!pack && type == OBJ_BLOB && !has_loose_object(oid)) {
36133695
/*
@@ -3681,6 +3763,8 @@ static void mark_pack_kept_in_core(struct string_list *packs, unsigned keep)
36813763
struct packed_git *p = item->util;
36823764
if (!p)
36833765
die(_("could not find pack '%s'"), item->string);
3766+
if (p->is_cruft && keep)
3767+
ignore_packed_keep_in_core_has_cruft = 1;
36843768
p->pack_keep_in_core = keep;
36853769
}
36863770
}

builtin/repack.c

Lines changed: 2 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -1022,29 +1022,13 @@ static int write_filtered_pack(const struct pack_objects_args *args,
10221022
return finish_pack_objects_cmd(&cmd, names, local);
10231023
}
10241024

1025-
static int existing_cruft_pack_cmp(const void *va, const void *vb)
1026-
{
1027-
struct packed_git *a = *(struct packed_git **)va;
1028-
struct packed_git *b = *(struct packed_git **)vb;
1029-
1030-
if (a->pack_size < b->pack_size)
1031-
return -1;
1032-
if (a->pack_size > b->pack_size)
1033-
return 1;
1034-
return 0;
1035-
}
1036-
10371025
static void collapse_small_cruft_packs(FILE *in, size_t max_size,
10381026
struct existing_packs *existing)
10391027
{
1040-
struct packed_git **existing_cruft, *p;
1028+
struct packed_git *p;
10411029
struct strbuf buf = STRBUF_INIT;
1042-
size_t total_size = 0;
1043-
size_t existing_cruft_nr = 0;
10441030
size_t i;
10451031

1046-
ALLOC_ARRAY(existing_cruft, existing->cruft_packs.nr);
1047-
10481032
for (p = get_all_packs(the_repository); p; p = p->next) {
10491033
if (!(p->is_cruft && p->pack_local))
10501034
continue;
@@ -1056,24 +1040,7 @@ static void collapse_small_cruft_packs(FILE *in, size_t max_size,
10561040
if (!string_list_has_string(&existing->cruft_packs, buf.buf))
10571041
continue;
10581042

1059-
if (existing_cruft_nr >= existing->cruft_packs.nr)
1060-
BUG("too many cruft packs (found %"PRIuMAX", but knew "
1061-
"of %"PRIuMAX")",
1062-
(uintmax_t)existing_cruft_nr + 1,
1063-
(uintmax_t)existing->cruft_packs.nr);
1064-
existing_cruft[existing_cruft_nr++] = p;
1065-
}
1066-
1067-
QSORT(existing_cruft, existing_cruft_nr, existing_cruft_pack_cmp);
1068-
1069-
for (i = 0; i < existing_cruft_nr; i++) {
1070-
size_t proposed;
1071-
1072-
p = existing_cruft[i];
1073-
proposed = st_add(total_size, p->pack_size);
1074-
1075-
if (proposed <= max_size) {
1076-
total_size = proposed;
1043+
if (p->pack_size < max_size) {
10771044
fprintf(in, "-%s\n", pack_basename(p));
10781045
} else {
10791046
retain_cruft_pack(existing, p);
@@ -1086,7 +1053,6 @@ static void collapse_small_cruft_packs(FILE *in, size_t max_size,
10861053
existing->non_kept_packs.items[i].string);
10871054

10881055
strbuf_release(&buf);
1089-
free(existing_cruft);
10901056
}
10911057

10921058
static int write_cruft_pack(const struct pack_objects_args *args,

packfile.c

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
#include "commit-graph.h"
2525
#include "pack-revindex.h"
2626
#include "promisor-remote.h"
27+
#include "pack-mtimes.h"
2728

2829
char *odb_pack_name(struct repository *r, struct strbuf *buf,
2930
const unsigned char *hash, const char *ext)
@@ -2107,7 +2108,7 @@ static void maybe_invalidate_kept_pack_cache(struct repository *r,
21072108
r->objects->kept_pack_cache.flags = 0;
21082109
}
21092110

2110-
static struct packed_git **kept_pack_cache(struct repository *r, unsigned flags)
2111+
struct packed_git **kept_pack_cache(struct repository *r, unsigned flags)
21112112
{
21122113
maybe_invalidate_kept_pack_cache(r, flags);
21132114

packfile.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -197,6 +197,8 @@ int has_object_pack(struct repository *r, const struct object_id *oid);
197197
int has_object_kept_pack(struct repository *r, const struct object_id *oid,
198198
unsigned flags);
199199

200+
struct packed_git **kept_pack_cache(struct repository *r, unsigned flags);
201+
200202
/*
201203
* Return 1 if an object in a promisor packfile is or refers to the given
202204
* object, 0 otherwise.

0 commit comments

Comments
 (0)