Skip to content

Commit 8b2f8cb

Browse files
rscharfegitster
authored andcommitted
oidset: use khash
Reimplement oidset using khash.h in order to reduce its memory footprint and make it faster. Performance of a command that mainly checks for duplicate objects using an oidset, with master and Clang 6.0.1: $ cmd="./git-cat-file --batch-all-objects --unordered --buffer --batch-check='%(objectname)'" $ /usr/bin/time $cmd >/dev/null 0.22user 0.03system 0:00.25elapsed 99%CPU (0avgtext+0avgdata 48484maxresident)k 0inputs+0outputs (0major+11204minor)pagefaults 0swaps $ hyperfine "$cmd" Benchmark #1: ./git-cat-file --batch-all-objects --unordered --buffer --batch-check='%(objectname)' Time (mean ± σ): 250.0 ms ± 6.0 ms [User: 225.9 ms, System: 23.6 ms] Range (min … max): 242.0 ms … 261.1 ms And with this patch: $ /usr/bin/time $cmd >/dev/null 0.14user 0.00system 0:00.15elapsed 100%CPU (0avgtext+0avgdata 41396maxresident)k 0inputs+0outputs (0major+8318minor)pagefaults 0swaps $ hyperfine "$cmd" Benchmark #1: ./git-cat-file --batch-all-objects --unordered --buffer --batch-check='%(objectname)' Time (mean ± σ): 151.9 ms ± 4.9 ms [User: 130.5 ms, System: 21.2 ms] Range (min … max): 148.2 ms … 170.4 ms Initial-patch-by: Jeff King <[email protected]> Signed-off-by: Rene Scharfe <[email protected]> Signed-off-by: Junio C Hamano <[email protected]>
1 parent 9249ca2 commit 8b2f8cb

File tree

2 files changed

+40
-30
lines changed

2 files changed

+40
-30
lines changed

oidset.c

Lines changed: 12 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -3,38 +3,28 @@
33

44
int oidset_contains(const struct oidset *set, const struct object_id *oid)
55
{
6-
if (!set->map.map.tablesize)
7-
return 0;
8-
return !!oidmap_get(&set->map, oid);
6+
khiter_t pos = kh_get_oid(&set->set, *oid);
7+
return pos != kh_end(&set->set);
98
}
109

1110
int oidset_insert(struct oidset *set, const struct object_id *oid)
1211
{
13-
struct oidmap_entry *entry;
14-
15-
if (!set->map.map.tablesize)
16-
oidmap_init(&set->map, 0);
17-
else if (oidset_contains(set, oid))
18-
return 1;
19-
20-
entry = xmalloc(sizeof(*entry));
21-
oidcpy(&entry->oid, oid);
22-
23-
oidmap_put(&set->map, entry);
24-
return 0;
12+
int added;
13+
kh_put_oid(&set->set, *oid, &added);
14+
return !added;
2515
}
2616

2717
int oidset_remove(struct oidset *set, const struct object_id *oid)
2818
{
29-
struct oidmap_entry *entry;
30-
31-
entry = oidmap_remove(&set->map, oid);
32-
free(entry);
33-
34-
return (entry != NULL);
19+
khiter_t pos = kh_get_oid(&set->set, *oid);
20+
if (pos == kh_end(&set->set))
21+
return 0;
22+
kh_del_oid(&set->set, pos);
23+
return 1;
3524
}
3625

3726
void oidset_clear(struct oidset *set)
3827
{
39-
oidmap_free(&set->map, 1);
28+
kh_release_oid(&set->set);
29+
oidset_init(set, 0);
4030
}

oidset.h

Lines changed: 28 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,8 @@
11
#ifndef OIDSET_H
22
#define OIDSET_H
33

4-
#include "oidmap.h"
4+
#include "hashmap.h"
5+
#include "khash.h"
56

67
/**
78
* This API is similar to sha1-array, in that it maintains a set of object ids
@@ -15,19 +16,33 @@
1516
* table overhead.
1617
*/
1718

19+
static inline unsigned int oid_hash(struct object_id oid)
20+
{
21+
return sha1hash(oid.hash);
22+
}
23+
24+
static inline int oid_equal(struct object_id a, struct object_id b)
25+
{
26+
return oideq(&a, &b);
27+
}
28+
29+
KHASH_INIT(oid, struct object_id, int, 0, oid_hash, oid_equal)
30+
1831
/**
1932
* A single oidset; should be zero-initialized (or use OIDSET_INIT).
2033
*/
2134
struct oidset {
22-
struct oidmap map;
35+
kh_oid_t set;
2336
};
2437

25-
#define OIDSET_INIT { OIDMAP_INIT }
38+
#define OIDSET_INIT { { 0 } }
2639

2740

2841
static inline void oidset_init(struct oidset *set, size_t initial_size)
2942
{
30-
oidmap_init(&set->map, initial_size);
43+
memset(&set->set, 0, sizeof(set->set));
44+
if (initial_size)
45+
kh_resize_oid(&set->set, initial_size);
3146
}
3247

3348
/**
@@ -58,19 +73,24 @@ int oidset_remove(struct oidset *set, const struct object_id *oid);
5873
void oidset_clear(struct oidset *set);
5974

6075
struct oidset_iter {
61-
struct oidmap_iter m_iter;
76+
kh_oid_t *set;
77+
khiter_t iter;
6278
};
6379

6480
static inline void oidset_iter_init(struct oidset *set,
6581
struct oidset_iter *iter)
6682
{
67-
oidmap_iter_init(&set->map, &iter->m_iter);
83+
iter->set = &set->set;
84+
iter->iter = kh_begin(iter->set);
6885
}
6986

7087
static inline struct object_id *oidset_iter_next(struct oidset_iter *iter)
7188
{
72-
struct oidmap_entry *e = oidmap_iter_next(&iter->m_iter);
73-
return e ? &e->oid : NULL;
89+
for (; iter->iter != kh_end(iter->set); iter->iter++) {
90+
if (kh_exist(iter->set, iter->iter))
91+
return &kh_key(iter->set, iter->iter++);
92+
}
93+
return NULL;
7494
}
7595

7696
static inline struct object_id *oidset_iter_first(struct oidset *set,

0 commit comments

Comments
 (0)