Skip to content

Commit 1a600b7

Browse files
bmwillgitster
authored andcommitted
attr: use hashmap for attribute dictionary
The current implementation of the attribute dictionary uses a custom hashtable. This modernizes the dictionary by converting it to the builtin 'hashmap' structure. Also, in order to enable a threaded API in the future add an accompanying mutex which must be acquired prior to accessing the dictionary of interned attributes. Signed-off-by: Brandon Williams <[email protected]> Signed-off-by: Junio C Hamano <[email protected]>
1 parent 428103c commit 1a600b7

File tree

3 files changed

+133
-45
lines changed

3 files changed

+133
-45
lines changed

attr.c

Lines changed: 128 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
#include "dir.h"
1515
#include "utf8.h"
1616
#include "quote.h"
17+
#include "thread-utils.h"
1718

1819
const char git_attr__true[] = "(builtin)true";
1920
const char git_attr__false[] = "\0(builtin)false";
@@ -23,28 +24,17 @@ static const char git_attr__unknown[] = "(builtin)unknown";
2324
#define ATTR__UNSET NULL
2425
#define ATTR__UNKNOWN git_attr__unknown
2526

26-
/* This is a randomly chosen prime. */
27-
#define HASHSIZE 257
28-
2927
#ifndef DEBUG_ATTR
3028
#define DEBUG_ATTR 0
3129
#endif
3230

33-
/*
34-
* NEEDSWORK: the global dictionary of the interned attributes
35-
* must stay a singleton even after we become thread-ready.
36-
* Access to these must be surrounded with mutex when it happens.
37-
*/
3831
struct git_attr {
39-
struct git_attr *next;
40-
unsigned h;
41-
int attr_nr;
32+
int attr_nr; /* unique attribute number */
4233
int maybe_macro;
4334
int maybe_real;
44-
char name[FLEX_ARRAY];
35+
char name[FLEX_ARRAY]; /* attribute name */
4536
};
4637
static int attr_nr;
47-
static struct git_attr *(git_attr_hash[HASHSIZE]);
4838

4939
/*
5040
* NEEDSWORK: maybe-real, maybe-macro are not property of
@@ -63,15 +53,94 @@ const char *git_attr_name(const struct git_attr *attr)
6353
return attr->name;
6454
}
6555

66-
static unsigned hash_name(const char *name, int namelen)
56+
struct attr_hashmap {
57+
struct hashmap map;
58+
#ifndef NO_PTHREADS
59+
pthread_mutex_t mutex;
60+
#endif
61+
};
62+
63+
static inline void hashmap_lock(struct attr_hashmap *map)
64+
{
65+
#ifndef NO_PTHREADS
66+
pthread_mutex_lock(&map->mutex);
67+
#endif
68+
}
69+
70+
static inline void hashmap_unlock(struct attr_hashmap *map)
6771
{
68-
unsigned val = 0, c;
72+
#ifndef NO_PTHREADS
73+
pthread_mutex_unlock(&map->mutex);
74+
#endif
75+
}
6976

70-
while (namelen--) {
71-
c = *name++;
72-
val = ((val << 7) | (val >> 22)) ^ c;
73-
}
74-
return val;
77+
/*
78+
* The global dictionary of all interned attributes. This
79+
* is a singleton object which is shared between threads.
80+
* Access to this dictionary must be surrounded with a mutex.
81+
*/
82+
static struct attr_hashmap g_attr_hashmap;
83+
84+
/* The container for objects stored in "struct attr_hashmap" */
85+
struct attr_hash_entry {
86+
struct hashmap_entry ent; /* must be the first member! */
87+
const char *key; /* the key; memory should be owned by value */
88+
size_t keylen; /* length of the key */
89+
void *value; /* the stored value */
90+
};
91+
92+
/* attr_hashmap comparison function */
93+
static int attr_hash_entry_cmp(const struct attr_hash_entry *a,
94+
const struct attr_hash_entry *b,
95+
void *unused)
96+
{
97+
return (a->keylen != b->keylen) || strncmp(a->key, b->key, a->keylen);
98+
}
99+
100+
/* Initialize an 'attr_hashmap' object */
101+
static void attr_hashmap_init(struct attr_hashmap *map)
102+
{
103+
hashmap_init(&map->map, (hashmap_cmp_fn) attr_hash_entry_cmp, 0);
104+
}
105+
106+
/*
107+
* Retrieve the 'value' stored in a hashmap given the provided 'key'.
108+
* If there is no matching entry, return NULL.
109+
*/
110+
static void *attr_hashmap_get(struct attr_hashmap *map,
111+
const char *key, size_t keylen)
112+
{
113+
struct attr_hash_entry k;
114+
struct attr_hash_entry *e;
115+
116+
if (!map->map.tablesize)
117+
attr_hashmap_init(map);
118+
119+
hashmap_entry_init(&k, memhash(key, keylen));
120+
k.key = key;
121+
k.keylen = keylen;
122+
e = hashmap_get(&map->map, &k, NULL);
123+
124+
return e ? e->value : NULL;
125+
}
126+
127+
/* Add 'value' to a hashmap based on the provided 'key'. */
128+
static void attr_hashmap_add(struct attr_hashmap *map,
129+
const char *key, size_t keylen,
130+
void *value)
131+
{
132+
struct attr_hash_entry *e;
133+
134+
if (!map->map.tablesize)
135+
attr_hashmap_init(map);
136+
137+
e = xmalloc(sizeof(struct attr_hash_entry));
138+
hashmap_entry_init(e, memhash(key, keylen));
139+
e->key = key;
140+
e->keylen = keylen;
141+
e->value = value;
142+
143+
hashmap_add(&map->map, e);
75144
}
76145

77146
static int attr_name_valid(const char *name, size_t namelen)
@@ -103,37 +172,44 @@ static void report_invalid_attr(const char *name, size_t len,
103172
strbuf_release(&err);
104173
}
105174

106-
static struct git_attr *git_attr_internal(const char *name, int len)
175+
/*
176+
* Given a 'name', lookup and return the corresponding attribute in the global
177+
* dictionary. If no entry is found, create a new attribute and store it in
178+
* the dictionary.
179+
*/
180+
static struct git_attr *git_attr_internal(const char *name, int namelen)
107181
{
108-
unsigned hval = hash_name(name, len);
109-
unsigned pos = hval % HASHSIZE;
110182
struct git_attr *a;
111183

112-
for (a = git_attr_hash[pos]; a; a = a->next) {
113-
if (a->h == hval &&
114-
!memcmp(a->name, name, len) && !a->name[len])
115-
return a;
116-
}
117-
118-
if (!attr_name_valid(name, len))
184+
if (!attr_name_valid(name, namelen))
119185
return NULL;
120186

121-
FLEX_ALLOC_MEM(a, name, name, len);
122-
a->h = hval;
123-
a->next = git_attr_hash[pos];
124-
a->attr_nr = attr_nr++;
125-
a->maybe_macro = 0;
126-
a->maybe_real = 0;
127-
git_attr_hash[pos] = a;
187+
hashmap_lock(&g_attr_hashmap);
188+
189+
a = attr_hashmap_get(&g_attr_hashmap, name, namelen);
190+
191+
if (!a) {
192+
FLEX_ALLOC_MEM(a, name, name, namelen);
193+
a->attr_nr = g_attr_hashmap.map.size;
194+
a->maybe_real = 0;
195+
a->maybe_macro = 0;
196+
197+
attr_hashmap_add(&g_attr_hashmap, a->name, namelen, a);
198+
assert(a->attr_nr == (g_attr_hashmap.map.size - 1));
199+
200+
/*
201+
* NEEDSWORK: per git_attr_check check_all_attr
202+
* will be initialized a lot more lazily, not
203+
* like this, and not here.
204+
*/
205+
REALLOC_ARRAY(check_all_attr, ++attr_nr);
206+
check_all_attr[a->attr_nr].attr = a;
207+
check_all_attr[a->attr_nr].value = ATTR__UNKNOWN;
208+
assert(a->attr_nr == (attr_nr - 1));
209+
}
210+
211+
hashmap_unlock(&g_attr_hashmap);
128212

129-
/*
130-
* NEEDSWORK: per git_attr_check check_all_attr
131-
* will be initialized a lot more lazily, not
132-
* like this, and not here.
133-
*/
134-
REALLOC_ARRAY(check_all_attr, attr_nr);
135-
check_all_attr[a->attr_nr].attr = a;
136-
check_all_attr[a->attr_nr].value = ATTR__UNKNOWN;
137213
return a;
138214
}
139215

@@ -941,3 +1017,10 @@ void git_attr_set_direction(enum git_attr_direction new, struct index_state *ist
9411017
drop_attr_stack();
9421018
use_index = istate;
9431019
}
1020+
1021+
void attr_start(void)
1022+
{
1023+
#ifndef NO_PTHREADS
1024+
pthread_mutex_init(&g_attr_hashmap.mutex, NULL);
1025+
#endif
1026+
}

attr.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -67,4 +67,6 @@ enum git_attr_direction {
6767
};
6868
void git_attr_set_direction(enum git_attr_direction, struct index_state *);
6969

70+
extern void attr_start(void);
71+
7072
#endif /* ATTR_H */

common-main.c

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
#include "cache.h"
22
#include "exec_cmd.h"
3+
#include "attr.h"
34

45
/*
56
* Many parts of Git have subprograms communicate via pipe, expect the
@@ -33,6 +34,8 @@ int main(int argc, const char **argv)
3334

3435
git_setup_gettext();
3536

37+
attr_start();
38+
3639
git_extract_argv0_path(argv[0]);
3740

3841
restore_sigpipe_to_default();

0 commit comments

Comments
 (0)