Skip to content

Commit c81e2c6

Browse files
committed
Merge branch 'kb/name-hash' into maint-1.8.1
* kb/name-hash: name-hash.c: fix endless loop with core.ignorecase=true
2 parents 6437980 + 2092678 commit c81e2c6

File tree

4 files changed

+166
-62
lines changed

4 files changed

+166
-62
lines changed

cache.h

Lines changed: 3 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -131,7 +131,6 @@ struct cache_entry {
131131
unsigned int ce_namelen;
132132
unsigned char sha1[20];
133133
struct cache_entry *next;
134-
struct cache_entry *dir_next;
135134
char name[FLEX_ARRAY]; /* more */
136135
};
137136

@@ -267,25 +266,15 @@ struct index_state {
267266
unsigned name_hash_initialized : 1,
268267
initialized : 1;
269268
struct hash_table name_hash;
269+
struct hash_table dir_hash;
270270
};
271271

272272
extern struct index_state the_index;
273273

274274
/* Name hashing */
275275
extern void add_name_hash(struct index_state *istate, struct cache_entry *ce);
276-
/*
277-
* We don't actually *remove* it, we can just mark it invalid so that
278-
* we won't find it in lookups.
279-
*
280-
* Not only would we have to search the lists (simple enough), but
281-
* we'd also have to rehash other hash buckets in case this makes the
282-
* hash bucket empty (common). So it's much better to just mark
283-
* it.
284-
*/
285-
static inline void remove_name_hash(struct cache_entry *ce)
286-
{
287-
ce->ce_flags |= CE_UNHASHED;
288-
}
276+
extern void remove_name_hash(struct index_state *istate, struct cache_entry *ce);
277+
extern void free_name_hash(struct index_state *istate);
289278

290279

291280
#ifndef NO_THE_INDEX_COMPATIBILITY_MACROS

name-hash.c

Lines changed: 139 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -32,38 +32,96 @@ static unsigned int hash_name(const char *name, int namelen)
3232
return hash;
3333
}
3434

35-
static void hash_index_entry_directories(struct index_state *istate, struct cache_entry *ce)
35+
struct dir_entry {
36+
struct dir_entry *next;
37+
struct dir_entry *parent;
38+
struct cache_entry *ce;
39+
int nr;
40+
unsigned int namelen;
41+
};
42+
43+
static struct dir_entry *find_dir_entry(struct index_state *istate,
44+
const char *name, unsigned int namelen)
45+
{
46+
unsigned int hash = hash_name(name, namelen);
47+
struct dir_entry *dir;
48+
49+
for (dir = lookup_hash(hash, &istate->dir_hash); dir; dir = dir->next)
50+
if (dir->namelen == namelen &&
51+
!strncasecmp(dir->ce->name, name, namelen))
52+
return dir;
53+
return NULL;
54+
}
55+
56+
static struct dir_entry *hash_dir_entry(struct index_state *istate,
57+
struct cache_entry *ce, int namelen)
3658
{
3759
/*
3860
* Throw each directory component in the hash for quick lookup
3961
* during a git status. Directory components are stored with their
4062
* closing slash. Despite submodules being a directory, they never
4163
* reach this point, because they are stored without a closing slash
42-
* in the cache.
64+
* in index_state.name_hash (as ordinary cache_entries).
4365
*
44-
* Note that the cache_entry stored with the directory does not
45-
* represent the directory itself. It is a pointer to an existing
46-
* filename, and its only purpose is to represent existence of the
47-
* directory in the cache. It is very possible multiple directory
48-
* hash entries may point to the same cache_entry.
66+
* Note that the cache_entry stored with the dir_entry merely
67+
* supplies the name of the directory (up to dir_entry.namelen). We
68+
* track the number of 'active' files in a directory in dir_entry.nr,
69+
* so we can tell if the directory is still relevant, e.g. for git
70+
* status. However, if cache_entries are removed, we cannot pinpoint
71+
* an exact cache_entry that's still active. It is very possible that
72+
* multiple dir_entries point to the same cache_entry.
4973
*/
50-
unsigned int hash;
51-
void **pos;
74+
struct dir_entry *dir;
75+
76+
/* get length of parent directory */
77+
while (namelen > 0 && !is_dir_sep(ce->name[namelen - 1]))
78+
namelen--;
79+
if (namelen <= 0)
80+
return NULL;
81+
82+
/* lookup existing entry for that directory */
83+
dir = find_dir_entry(istate, ce->name, namelen);
84+
if (!dir) {
85+
/* not found, create it and add to hash table */
86+
void **pdir;
87+
unsigned int hash = hash_name(ce->name, namelen);
5288

53-
const char *ptr = ce->name;
54-
while (*ptr) {
55-
while (*ptr && *ptr != '/')
56-
++ptr;
57-
if (*ptr == '/') {
58-
++ptr;
59-
hash = hash_name(ce->name, ptr - ce->name);
60-
pos = insert_hash(hash, ce, &istate->name_hash);
61-
if (pos) {
62-
ce->dir_next = *pos;
63-
*pos = ce;
64-
}
89+
dir = xcalloc(1, sizeof(struct dir_entry));
90+
dir->namelen = namelen;
91+
dir->ce = ce;
92+
93+
pdir = insert_hash(hash, dir, &istate->dir_hash);
94+
if (pdir) {
95+
dir->next = *pdir;
96+
*pdir = dir;
6597
}
98+
99+
/* recursively add missing parent directories */
100+
dir->parent = hash_dir_entry(istate, ce, namelen - 1);
66101
}
102+
return dir;
103+
}
104+
105+
static void add_dir_entry(struct index_state *istate, struct cache_entry *ce)
106+
{
107+
/* Add reference to the directory entry (and parents if 0). */
108+
struct dir_entry *dir = hash_dir_entry(istate, ce, ce_namelen(ce));
109+
while (dir && !(dir->nr++))
110+
dir = dir->parent;
111+
}
112+
113+
static void remove_dir_entry(struct index_state *istate, struct cache_entry *ce)
114+
{
115+
/*
116+
* Release reference to the directory entry (and parents if 0).
117+
*
118+
* Note: we do not remove / free the entry because there's no
119+
* hash.[ch]::remove_hash and dir->next may point to other entries
120+
* that are still valid, so we must not free the memory.
121+
*/
122+
struct dir_entry *dir = hash_dir_entry(istate, ce, ce_namelen(ce));
123+
while (dir && dir->nr && !(--dir->nr))
124+
dir = dir->parent;
67125
}
68126

69127
static void hash_index_entry(struct index_state *istate, struct cache_entry *ce)
@@ -74,16 +132,16 @@ static void hash_index_entry(struct index_state *istate, struct cache_entry *ce)
74132
if (ce->ce_flags & CE_HASHED)
75133
return;
76134
ce->ce_flags |= CE_HASHED;
77-
ce->next = ce->dir_next = NULL;
135+
ce->next = NULL;
78136
hash = hash_name(ce->name, ce_namelen(ce));
79137
pos = insert_hash(hash, ce, &istate->name_hash);
80138
if (pos) {
81139
ce->next = *pos;
82140
*pos = ce;
83141
}
84142

85-
if (ignore_case)
86-
hash_index_entry_directories(istate, ce);
143+
if (ignore_case && !(ce->ce_flags & CE_UNHASHED))
144+
add_dir_entry(istate, ce);
87145
}
88146

89147
static void lazy_init_name_hash(struct index_state *istate)
@@ -99,11 +157,33 @@ static void lazy_init_name_hash(struct index_state *istate)
99157

100158
void add_name_hash(struct index_state *istate, struct cache_entry *ce)
101159
{
160+
/* if already hashed, add reference to directory entries */
161+
if (ignore_case && (ce->ce_flags & CE_STATE_MASK) == CE_STATE_MASK)
162+
add_dir_entry(istate, ce);
163+
102164
ce->ce_flags &= ~CE_UNHASHED;
103165
if (istate->name_hash_initialized)
104166
hash_index_entry(istate, ce);
105167
}
106168

169+
/*
170+
* We don't actually *remove* it, we can just mark it invalid so that
171+
* we won't find it in lookups.
172+
*
173+
* Not only would we have to search the lists (simple enough), but
174+
* we'd also have to rehash other hash buckets in case this makes the
175+
* hash bucket empty (common). So it's much better to just mark
176+
* it.
177+
*/
178+
void remove_name_hash(struct index_state *istate, struct cache_entry *ce)
179+
{
180+
/* if already hashed, release reference to directory entries */
181+
if (ignore_case && (ce->ce_flags & CE_STATE_MASK) == CE_HASHED)
182+
remove_dir_entry(istate, ce);
183+
184+
ce->ce_flags |= CE_UNHASHED;
185+
}
186+
107187
static int slow_same_name(const char *name1, int len1, const char *name2, int len2)
108188
{
109189
if (len1 != len2)
@@ -137,18 +217,7 @@ static int same_name(const struct cache_entry *ce, const char *name, int namelen
137217
if (!icase)
138218
return 0;
139219

140-
/*
141-
* If the entry we're comparing is a filename (no trailing slash), then compare
142-
* the lengths exactly.
143-
*/
144-
if (name[namelen - 1] != '/')
145-
return slow_same_name(name, namelen, ce->name, len);
146-
147-
/*
148-
* For a directory, we point to an arbitrary cache_entry filename. Just
149-
* make sure the directory portion matches.
150-
*/
151-
return slow_same_name(name, namelen, ce->name, namelen < len ? namelen : len);
220+
return slow_same_name(name, namelen, ce->name, len);
152221
}
153222

154223
struct cache_entry *index_name_exists(struct index_state *istate, const char *name, int namelen, int icase)
@@ -164,27 +233,54 @@ struct cache_entry *index_name_exists(struct index_state *istate, const char *na
164233
if (same_name(ce, name, namelen, icase))
165234
return ce;
166235
}
167-
if (icase && name[namelen - 1] == '/')
168-
ce = ce->dir_next;
169-
else
170-
ce = ce->next;
236+
ce = ce->next;
171237
}
172238

173239
/*
174-
* Might be a submodule. Despite submodules being directories,
240+
* When looking for a directory (trailing '/'), it might be a
241+
* submodule or a directory. Despite submodules being directories,
175242
* they are stored in the name hash without a closing slash.
176-
* When ignore_case is 1, directories are stored in the name hash
177-
* with their closing slash.
243+
* When ignore_case is 1, directories are stored in a separate hash
244+
* table *with* their closing slash.
178245
*
179246
* The side effect of this storage technique is we have need to
247+
* lookup the directory in a separate hash table, and if not found
180248
* remove the slash from name and perform the lookup again without
181249
* the slash. If a match is made, S_ISGITLINK(ce->mode) will be
182250
* true.
183251
*/
184252
if (icase && name[namelen - 1] == '/') {
253+
struct dir_entry *dir = find_dir_entry(istate, name, namelen);
254+
if (dir && dir->nr)
255+
return dir->ce;
256+
185257
ce = index_name_exists(istate, name, namelen - 1, icase);
186258
if (ce && S_ISGITLINK(ce->ce_mode))
187259
return ce;
188260
}
189261
return NULL;
190262
}
263+
264+
static int free_dir_entry(void *entry, void *unused)
265+
{
266+
struct dir_entry *dir = entry;
267+
while (dir) {
268+
struct dir_entry *next = dir->next;
269+
free(dir);
270+
dir = next;
271+
}
272+
return 0;
273+
}
274+
275+
void free_name_hash(struct index_state *istate)
276+
{
277+
if (!istate->name_hash_initialized)
278+
return;
279+
istate->name_hash_initialized = 0;
280+
if (ignore_case)
281+
/* free directory entries */
282+
for_each_hash(&istate->dir_hash, free_dir_entry, NULL);
283+
284+
free_hash(&istate->name_hash);
285+
free_hash(&istate->dir_hash);
286+
}

read-cache.c

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,7 @@ static void replace_index_entry(struct index_state *istate, int nr, struct cache
4646
{
4747
struct cache_entry *old = istate->cache[nr];
4848

49-
remove_name_hash(old);
49+
remove_name_hash(istate, old);
5050
set_index_entry(istate, nr, ce);
5151
istate->cache_changed = 1;
5252
}
@@ -456,7 +456,7 @@ int remove_index_entry_at(struct index_state *istate, int pos)
456456
struct cache_entry *ce = istate->cache[pos];
457457

458458
record_resolve_undo(istate, ce);
459-
remove_name_hash(ce);
459+
remove_name_hash(istate, ce);
460460
istate->cache_changed = 1;
461461
istate->cache_nr--;
462462
if (pos >= istate->cache_nr)
@@ -479,7 +479,7 @@ void remove_marked_cache_entries(struct index_state *istate)
479479

480480
for (i = j = 0; i < istate->cache_nr; i++) {
481481
if (ce_array[i]->ce_flags & CE_REMOVE)
482-
remove_name_hash(ce_array[i]);
482+
remove_name_hash(istate, ce_array[i]);
483483
else
484484
ce_array[j++] = ce_array[i];
485485
}
@@ -1511,8 +1511,7 @@ int discard_index(struct index_state *istate)
15111511
istate->cache_changed = 0;
15121512
istate->timestamp.sec = 0;
15131513
istate->timestamp.nsec = 0;
1514-
istate->name_hash_initialized = 0;
1515-
free_hash(&istate->name_hash);
1514+
free_name_hash(istate);
15161515
cache_tree_free(&(istate->cache_tree));
15171516
istate->initialized = 0;
15181517

t/t7062-wtstatus-ignorecase.sh

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
#!/bin/sh
2+
3+
test_description='git-status with core.ignorecase=true'
4+
5+
. ./test-lib.sh
6+
7+
test_expect_success 'status with hash collisions' '
8+
# note: "V/", "V/XQANY/" and "WURZAUP/" produce the same hash code
9+
# in name-hash.c::hash_name
10+
mkdir V &&
11+
mkdir V/XQANY &&
12+
mkdir WURZAUP &&
13+
touch V/XQANY/test &&
14+
git config core.ignorecase true &&
15+
git add . &&
16+
# test is successful if git status completes (no endless loop)
17+
git status
18+
'
19+
20+
test_done

0 commit comments

Comments
 (0)