Skip to content

Commit 6d831b8

Browse files
committed
Merge branch 'cs/store-packfiles-in-hashmap'
In a repository with many packfiles, the cost of the procedure that avoids registering the same packfile twice was unnecessarily high by using an inefficient search algorithm, which has been corrected. * cs/store-packfiles-in-hashmap: packfile.c: speed up loading lots of packfiles
2 parents 3beff38 + ec48540 commit 6d831b8

File tree

4 files changed

+52
-9
lines changed

4 files changed

+52
-9
lines changed

object-store.h

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,7 @@ struct oid_array *odb_loose_cache(struct object_directory *odb,
6060
void odb_clear_loose_cache(struct object_directory *odb);
6161

6262
struct packed_git {
63+
struct hashmap_entry packmap_ent;
6364
struct packed_git *next;
6465
struct list_head mru;
6566
struct pack_window *windows;
@@ -88,6 +89,20 @@ struct packed_git {
8889

8990
struct multi_pack_index;
9091

92+
static inline int pack_map_entry_cmp(const void *unused_cmp_data,
93+
const struct hashmap_entry *entry,
94+
const struct hashmap_entry *entry2,
95+
const void *keydata)
96+
{
97+
const char *key = keydata;
98+
const struct packed_git *pg1, *pg2;
99+
100+
pg1 = container_of(entry, const struct packed_git, packmap_ent);
101+
pg2 = container_of(entry2, const struct packed_git, packmap_ent);
102+
103+
return strcmp(pg1->pack_name, key ? key : pg2->pack_name);
104+
}
105+
91106
struct raw_object_store {
92107
/*
93108
* Set of all object directories; the main directory is first (and
@@ -131,6 +146,12 @@ struct raw_object_store {
131146
/* A most-recently-used ordered version of the packed_git list. */
132147
struct list_head packed_git_mru;
133148

149+
/*
150+
* A map of packfiles to packed_git structs for tracking which
151+
* packs have been loaded already.
152+
*/
153+
struct hashmap pack_map;
154+
134155
/*
135156
* A fast, rough count of the number of objects in the repository.
136157
* These two fields are not meant for direct access. Use

object.c

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -479,6 +479,7 @@ struct raw_object_store *raw_object_store_new(void)
479479

480480
memset(o, 0, sizeof(*o));
481481
INIT_LIST_HEAD(&o->packed_git_mru);
482+
hashmap_init(&o->pack_map, pack_map_entry_cmp, NULL, 0);
482483
return o;
483484
}
484485

@@ -518,6 +519,8 @@ void raw_object_store_clear(struct raw_object_store *o)
518519
INIT_LIST_HEAD(&o->packed_git_mru);
519520
close_object_store(o);
520521
o->packed_git = NULL;
522+
523+
hashmap_free(&o->pack_map);
521524
}
522525

523526
void parsed_object_pool_clear(struct parsed_object_pool *o)

packfile.c

Lines changed: 10 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -757,6 +757,9 @@ void install_packed_git(struct repository *r, struct packed_git *pack)
757757

758758
pack->next = r->objects->packed_git;
759759
r->objects->packed_git = pack;
760+
761+
hashmap_entry_init(&pack->packmap_ent, strhash(pack->pack_name));
762+
hashmap_add(&r->objects->pack_map, &pack->packmap_ent);
760763
}
761764

762765
void (*report_garbage)(unsigned seen_bits, const char *path);
@@ -856,20 +859,18 @@ static void prepare_pack(const char *full_name, size_t full_name_len,
856859

857860
if (strip_suffix_mem(full_name, &base_len, ".idx") &&
858861
!(data->m && midx_contains_pack(data->m, file_name))) {
859-
/* Don't reopen a pack we already have. */
860-
for (p = data->r->objects->packed_git; p; p = p->next) {
861-
size_t len;
862-
if (strip_suffix(p->pack_name, ".pack", &len) &&
863-
len == base_len &&
864-
!memcmp(p->pack_name, full_name, len))
865-
break;
866-
}
862+
struct hashmap_entry hent;
863+
char *pack_name = xstrfmt("%.*s.pack", (int)base_len, full_name);
864+
unsigned int hash = strhash(pack_name);
865+
hashmap_entry_init(&hent, hash);
867866

868-
if (!p) {
867+
/* Don't reopen a pack we already have. */
868+
if (!hashmap_get(&data->r->objects->pack_map, &hent, pack_name)) {
869869
p = add_packed_git(full_name, full_name_len, data->local);
870870
if (p)
871871
install_packed_git(data->r, p);
872872
}
873+
free(pack_name);
873874
}
874875

875876
if (!report_garbage)

t/perf/p5303-many-packs.sh

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -85,4 +85,22 @@ do
8585
'
8686
done
8787

88+
# Measure pack loading with 10,000 packs.
89+
test_expect_success 'generate lots of packs' '
90+
for i in $(test_seq 10000); do
91+
echo "blob"
92+
echo "data <<EOF"
93+
echo "blob $i"
94+
echo "EOF"
95+
echo "checkpoint"
96+
done |
97+
git -c fastimport.unpackLimit=0 fast-import
98+
'
99+
100+
# The purpose of this test is to evaluate load time for a large number
101+
# of packs while doing as little other work as possible.
102+
test_perf "load 10,000 packs" '
103+
git rev-parse --verify "HEAD^{commit}"
104+
'
105+
88106
test_done

0 commit comments

Comments
 (0)