Skip to content

Commit e5cc59c

Browse files
committed
Merge branch 'ew/many-alternate-optim'
Optimization for repositories with many alternate object store. * ew/many-alternate-optim: oidtree: a crit-bit tree for odb_loose_cache oidcpy_with_padding: constify `src' arg make object_directory.loose_objects_subdir_seen a bitmap avoid strlen via strbuf_addstr in link_alt_odb_entry speed up alt_odb_usable() with many alternates
2 parents 14793a4 + 92d8ed8 commit e5cc59c

16 files changed

+534
-51
lines changed

Makefile

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -726,6 +726,7 @@ TEST_BUILTINS_OBJS += test-mergesort.o
726726
TEST_BUILTINS_OBJS += test-mktemp.o
727727
TEST_BUILTINS_OBJS += test-oid-array.o
728728
TEST_BUILTINS_OBJS += test-oidmap.o
729+
TEST_BUILTINS_OBJS += test-oidtree.o
729730
TEST_BUILTINS_OBJS += test-online-cpus.o
730731
TEST_BUILTINS_OBJS += test-parse-options.o
731732
TEST_BUILTINS_OBJS += test-parse-pathspec-file.o
@@ -850,6 +851,7 @@ LIB_OBJS += branch.o
850851
LIB_OBJS += bulk-checkin.o
851852
LIB_OBJS += bundle.o
852853
LIB_OBJS += cache-tree.o
854+
LIB_OBJS += cbtree.o
853855
LIB_OBJS += chdir-notify.o
854856
LIB_OBJS += checkout.o
855857
LIB_OBJS += chunk-format.o
@@ -945,6 +947,7 @@ LIB_OBJS += object.o
945947
LIB_OBJS += oid-array.o
946948
LIB_OBJS += oidmap.o
947949
LIB_OBJS += oidset.o
950+
LIB_OBJS += oidtree.o
948951
LIB_OBJS += pack-bitmap-write.o
949952
LIB_OBJS += pack-bitmap.o
950953
LIB_OBJS += pack-check.o

cbtree.c

Lines changed: 167 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,167 @@
1+
/*
2+
* crit-bit tree implementation, does no allocations internally
3+
* For more information on crit-bit trees: https://cr.yp.to/critbit.html
4+
* Based on Adam Langley's adaptation of Dan Bernstein's public domain code
5+
* git clone https://github.com/agl/critbit.git
6+
*/
7+
#include "cbtree.h"
8+
9+
static struct cb_node *cb_node_of(const void *p)
10+
{
11+
return (struct cb_node *)((uintptr_t)p - 1);
12+
}
13+
14+
/* locate the best match, does not do a final comparision */
15+
static struct cb_node *cb_internal_best_match(struct cb_node *p,
16+
const uint8_t *k, size_t klen)
17+
{
18+
while (1 & (uintptr_t)p) {
19+
struct cb_node *q = cb_node_of(p);
20+
uint8_t c = q->byte < klen ? k[q->byte] : 0;
21+
size_t direction = (1 + (q->otherbits | c)) >> 8;
22+
23+
p = q->child[direction];
24+
}
25+
return p;
26+
}
27+
28+
/* returns NULL if successful, existing cb_node if duplicate */
29+
struct cb_node *cb_insert(struct cb_tree *t, struct cb_node *node, size_t klen)
30+
{
31+
size_t newbyte, newotherbits;
32+
uint8_t c;
33+
int newdirection;
34+
struct cb_node **wherep, *p;
35+
36+
assert(!((uintptr_t)node & 1)); /* allocations must be aligned */
37+
38+
if (!t->root) { /* insert into empty tree */
39+
t->root = node;
40+
return NULL; /* success */
41+
}
42+
43+
/* see if a node already exists */
44+
p = cb_internal_best_match(t->root, node->k, klen);
45+
46+
/* find first differing byte */
47+
for (newbyte = 0; newbyte < klen; newbyte++) {
48+
if (p->k[newbyte] != node->k[newbyte])
49+
goto different_byte_found;
50+
}
51+
return p; /* element exists, let user deal with it */
52+
53+
different_byte_found:
54+
newotherbits = p->k[newbyte] ^ node->k[newbyte];
55+
newotherbits |= newotherbits >> 1;
56+
newotherbits |= newotherbits >> 2;
57+
newotherbits |= newotherbits >> 4;
58+
newotherbits = (newotherbits & ~(newotherbits >> 1)) ^ 255;
59+
c = p->k[newbyte];
60+
newdirection = (1 + (newotherbits | c)) >> 8;
61+
62+
node->byte = newbyte;
63+
node->otherbits = newotherbits;
64+
node->child[1 - newdirection] = node;
65+
66+
/* find a place to insert it */
67+
wherep = &t->root;
68+
for (;;) {
69+
struct cb_node *q;
70+
size_t direction;
71+
72+
p = *wherep;
73+
if (!(1 & (uintptr_t)p))
74+
break;
75+
q = cb_node_of(p);
76+
if (q->byte > newbyte)
77+
break;
78+
if (q->byte == newbyte && q->otherbits > newotherbits)
79+
break;
80+
c = q->byte < klen ? node->k[q->byte] : 0;
81+
direction = (1 + (q->otherbits | c)) >> 8;
82+
wherep = q->child + direction;
83+
}
84+
85+
node->child[newdirection] = *wherep;
86+
*wherep = (struct cb_node *)(1 + (uintptr_t)node);
87+
88+
return NULL; /* success */
89+
}
90+
91+
struct cb_node *cb_lookup(struct cb_tree *t, const uint8_t *k, size_t klen)
92+
{
93+
struct cb_node *p = cb_internal_best_match(t->root, k, klen);
94+
95+
return p && !memcmp(p->k, k, klen) ? p : NULL;
96+
}
97+
98+
struct cb_node *cb_unlink(struct cb_tree *t, const uint8_t *k, size_t klen)
99+
{
100+
struct cb_node **wherep = &t->root;
101+
struct cb_node **whereq = NULL;
102+
struct cb_node *q = NULL;
103+
size_t direction = 0;
104+
uint8_t c;
105+
struct cb_node *p = t->root;
106+
107+
if (!p) return NULL; /* empty tree, nothing to delete */
108+
109+
/* traverse to find best match, keeping link to parent */
110+
while (1 & (uintptr_t)p) {
111+
whereq = wherep;
112+
q = cb_node_of(p);
113+
c = q->byte < klen ? k[q->byte] : 0;
114+
direction = (1 + (q->otherbits | c)) >> 8;
115+
wherep = q->child + direction;
116+
p = *wherep;
117+
}
118+
119+
if (memcmp(p->k, k, klen))
120+
return NULL; /* no match, nothing unlinked */
121+
122+
/* found an exact match */
123+
if (whereq) /* update parent */
124+
*whereq = q->child[1 - direction];
125+
else
126+
t->root = NULL;
127+
return p;
128+
}
129+
130+
static enum cb_next cb_descend(struct cb_node *p, cb_iter fn, void *arg)
131+
{
132+
if (1 & (uintptr_t)p) {
133+
struct cb_node *q = cb_node_of(p);
134+
enum cb_next n = cb_descend(q->child[0], fn, arg);
135+
136+
return n == CB_BREAK ? n : cb_descend(q->child[1], fn, arg);
137+
} else {
138+
return fn(p, arg);
139+
}
140+
}
141+
142+
void cb_each(struct cb_tree *t, const uint8_t *kpfx, size_t klen,
143+
cb_iter fn, void *arg)
144+
{
145+
struct cb_node *p = t->root;
146+
struct cb_node *top = p;
147+
size_t i = 0;
148+
149+
if (!p) return; /* empty tree */
150+
151+
/* Walk tree, maintaining top pointer */
152+
while (1 & (uintptr_t)p) {
153+
struct cb_node *q = cb_node_of(p);
154+
uint8_t c = q->byte < klen ? kpfx[q->byte] : 0;
155+
size_t direction = (1 + (q->otherbits | c)) >> 8;
156+
157+
p = q->child[direction];
158+
if (q->byte < klen)
159+
top = p;
160+
}
161+
162+
for (i = 0; i < klen; i++) {
163+
if (p->k[i] != kpfx[i])
164+
return; /* "best" match failed */
165+
}
166+
cb_descend(top, fn, arg);
167+
}

cbtree.h

Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
/*
2+
* crit-bit tree implementation, does no allocations internally
3+
* For more information on crit-bit trees: https://cr.yp.to/critbit.html
4+
* Based on Adam Langley's adaptation of Dan Bernstein's public domain code
5+
* git clone https://github.com/agl/critbit.git
6+
*
7+
* This is adapted to store arbitrary data (not just NUL-terminated C strings
8+
* and allocates no memory internally. The user needs to allocate
9+
* "struct cb_node" and fill cb_node.k[] with arbitrary match data
10+
* for memcmp.
11+
* If "klen" is variable, then it should be embedded into "c_node.k[]"
12+
* Recursion is bound by the maximum value of "klen" used.
13+
*/
14+
#ifndef CBTREE_H
15+
#define CBTREE_H
16+
17+
#include "git-compat-util.h"
18+
19+
struct cb_node;
20+
struct cb_node {
21+
struct cb_node *child[2];
22+
/*
23+
* n.b. uint32_t for `byte' is excessive for OIDs,
24+
* we may consider shorter variants if nothing else gets stored.
25+
*/
26+
uint32_t byte;
27+
uint8_t otherbits;
28+
uint8_t k[FLEX_ARRAY]; /* arbitrary data */
29+
};
30+
31+
struct cb_tree {
32+
struct cb_node *root;
33+
};
34+
35+
enum cb_next {
36+
CB_CONTINUE = 0,
37+
CB_BREAK = 1
38+
};
39+
40+
#define CBTREE_INIT { .root = NULL }
41+
42+
static inline void cb_init(struct cb_tree *t)
43+
{
44+
t->root = NULL;
45+
}
46+
47+
struct cb_node *cb_lookup(struct cb_tree *, const uint8_t *k, size_t klen);
48+
struct cb_node *cb_insert(struct cb_tree *, struct cb_node *, size_t klen);
49+
struct cb_node *cb_unlink(struct cb_tree *t, const uint8_t *k, size_t klen);
50+
51+
typedef enum cb_next (*cb_iter)(struct cb_node *, void *arg);
52+
53+
void cb_each(struct cb_tree *, const uint8_t *kpfx, size_t klen,
54+
cb_iter, void *arg);
55+
56+
#endif /* CBTREE_H */

dir.c

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -78,11 +78,21 @@ int fspathcmp(const char *a, const char *b)
7878
return ignore_case ? strcasecmp(a, b) : strcmp(a, b);
7979
}
8080

81+
int fspatheq(const char *a, const char *b)
82+
{
83+
return !fspathcmp(a, b);
84+
}
85+
8186
int fspathncmp(const char *a, const char *b, size_t count)
8287
{
8388
return ignore_case ? strncasecmp(a, b, count) : strncmp(a, b, count);
8489
}
8590

91+
unsigned int fspathhash(const char *str)
92+
{
93+
return ignore_case ? strihash(str) : strhash(str);
94+
}
95+
8696
int git_fnmatch(const struct pathspec_item *item,
8797
const char *pattern, const char *string,
8898
int prefix)

dir.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -489,7 +489,9 @@ int remove_dir_recursively(struct strbuf *path, int flag);
489489
int remove_path(const char *path);
490490

491491
int fspathcmp(const char *a, const char *b);
492+
int fspatheq(const char *a, const char *b);
492493
int fspathncmp(const char *a, const char *b, size_t count);
494+
unsigned int fspathhash(const char *str);
493495

494496
/*
495497
* The prefix part of pattern must not contains wildcards.

hash.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -265,7 +265,7 @@ static inline void oidcpy(struct object_id *dst, const struct object_id *src)
265265

266266
/* Like oidcpy() but zero-pads the unused bytes in dst's hash array. */
267267
static inline void oidcpy_with_padding(struct object_id *dst,
268-
struct object_id *src)
268+
const struct object_id *src)
269269
{
270270
size_t hashsz;
271271

0 commit comments

Comments
 (0)