@@ -32,38 +32,96 @@ static unsigned int hash_name(const char *name, int namelen)
3232 return hash ;
3333}
3434
35- static void hash_index_entry_directories (struct index_state * istate , struct cache_entry * ce )
35+ struct dir_entry {
36+ struct dir_entry * next ;
37+ struct dir_entry * parent ;
38+ struct cache_entry * ce ;
39+ int nr ;
40+ unsigned int namelen ;
41+ };
42+
43+ static struct dir_entry * find_dir_entry (struct index_state * istate ,
44+ const char * name , unsigned int namelen )
45+ {
46+ unsigned int hash = hash_name (name , namelen );
47+ struct dir_entry * dir ;
48+
49+ for (dir = lookup_hash (hash , & istate -> dir_hash ); dir ; dir = dir -> next )
50+ if (dir -> namelen == namelen &&
51+ !strncasecmp (dir -> ce -> name , name , namelen ))
52+ return dir ;
53+ return NULL ;
54+ }
55+
56+ static struct dir_entry * hash_dir_entry (struct index_state * istate ,
57+ struct cache_entry * ce , int namelen )
3658{
3759 /*
3860 * Throw each directory component in the hash for quick lookup
3961 * during a git status. Directory components are stored with their
4062 * closing slash. Despite submodules being a directory, they never
4163 * reach this point, because they are stored without a closing slash
42- * in the cache .
64+ * in index_state.name_hash (as ordinary cache_entries) .
4365 *
44- * Note that the cache_entry stored with the directory does not
45- * represent the directory itself. It is a pointer to an existing
46- * filename, and its only purpose is to represent existence of the
47- * directory in the cache. It is very possible multiple directory
48- * hash entries may point to the same cache_entry.
66+ * Note that the cache_entry stored with the dir_entry merely
67+ * supplies the name of the directory (up to dir_entry.namelen). We
68+ * track the number of 'active' files in a directory in dir_entry.nr,
69+ * so we can tell if the directory is still relevant, e.g. for git
70+ * status. However, if cache_entries are removed, we cannot pinpoint
71+ * an exact cache_entry that's still active. It is very possible that
72+ * multiple dir_entries point to the same cache_entry.
4973 */
50- unsigned int hash ;
51- void * * pos ;
74+ struct dir_entry * dir ;
75+
76+ /* get length of parent directory */
77+ while (namelen > 0 && !is_dir_sep (ce -> name [namelen - 1 ]))
78+ namelen -- ;
79+ if (namelen <= 0 )
80+ return NULL ;
81+
82+ /* lookup existing entry for that directory */
83+ dir = find_dir_entry (istate , ce -> name , namelen );
84+ if (!dir ) {
85+ /* not found, create it and add to hash table */
86+ void * * pdir ;
87+ unsigned int hash = hash_name (ce -> name , namelen );
5288
53- const char * ptr = ce -> name ;
54- while (* ptr ) {
55- while (* ptr && * ptr != '/' )
56- ++ ptr ;
57- if (* ptr == '/' ) {
58- ++ ptr ;
59- hash = hash_name (ce -> name , ptr - ce -> name );
60- pos = insert_hash (hash , ce , & istate -> name_hash );
61- if (pos ) {
62- ce -> dir_next = * pos ;
63- * pos = ce ;
64- }
89+ dir = xcalloc (1 , sizeof (struct dir_entry ));
90+ dir -> namelen = namelen ;
91+ dir -> ce = ce ;
92+
93+ pdir = insert_hash (hash , dir , & istate -> dir_hash );
94+ if (pdir ) {
95+ dir -> next = * pdir ;
96+ * pdir = dir ;
6597 }
98+
99+ /* recursively add missing parent directories */
100+ dir -> parent = hash_dir_entry (istate , ce , namelen - 1 );
66101 }
102+ return dir ;
103+ }
104+
105+ static void add_dir_entry (struct index_state * istate , struct cache_entry * ce )
106+ {
107+ /* Add reference to the directory entry (and parents if 0). */
108+ struct dir_entry * dir = hash_dir_entry (istate , ce , ce_namelen (ce ));
109+ while (dir && !(dir -> nr ++ ))
110+ dir = dir -> parent ;
111+ }
112+
113+ static void remove_dir_entry (struct index_state * istate , struct cache_entry * ce )
114+ {
115+ /*
116+ * Release reference to the directory entry (and parents if 0).
117+ *
118+ * Note: we do not remove / free the entry because there's no
119+ * hash.[ch]::remove_hash and dir->next may point to other entries
120+ * that are still valid, so we must not free the memory.
121+ */
122+ struct dir_entry * dir = hash_dir_entry (istate , ce , ce_namelen (ce ));
123+ while (dir && dir -> nr && !(-- dir -> nr ))
124+ dir = dir -> parent ;
67125}
68126
69127static void hash_index_entry (struct index_state * istate , struct cache_entry * ce )
@@ -74,16 +132,16 @@ static void hash_index_entry(struct index_state *istate, struct cache_entry *ce)
74132 if (ce -> ce_flags & CE_HASHED )
75133 return ;
76134 ce -> ce_flags |= CE_HASHED ;
77- ce -> next = ce -> dir_next = NULL ;
135+ ce -> next = NULL ;
78136 hash = hash_name (ce -> name , ce_namelen (ce ));
79137 pos = insert_hash (hash , ce , & istate -> name_hash );
80138 if (pos ) {
81139 ce -> next = * pos ;
82140 * pos = ce ;
83141 }
84142
85- if (ignore_case )
86- hash_index_entry_directories (istate , ce );
143+ if (ignore_case && !( ce -> ce_flags & CE_UNHASHED ) )
144+ add_dir_entry (istate , ce );
87145}
88146
89147static void lazy_init_name_hash (struct index_state * istate )
@@ -99,11 +157,33 @@ static void lazy_init_name_hash(struct index_state *istate)
99157
100158void add_name_hash (struct index_state * istate , struct cache_entry * ce )
101159{
160+ /* if already hashed, add reference to directory entries */
161+ if (ignore_case && (ce -> ce_flags & CE_STATE_MASK ) == CE_STATE_MASK )
162+ add_dir_entry (istate , ce );
163+
102164 ce -> ce_flags &= ~CE_UNHASHED ;
103165 if (istate -> name_hash_initialized )
104166 hash_index_entry (istate , ce );
105167}
106168
169+ /*
170+ * We don't actually *remove* it, we can just mark it invalid so that
171+ * we won't find it in lookups.
172+ *
173+ * Not only would we have to search the lists (simple enough), but
174+ * we'd also have to rehash other hash buckets in case this makes the
175+ * hash bucket empty (common). So it's much better to just mark
176+ * it.
177+ */
178+ void remove_name_hash (struct index_state * istate , struct cache_entry * ce )
179+ {
180+ /* if already hashed, release reference to directory entries */
181+ if (ignore_case && (ce -> ce_flags & CE_STATE_MASK ) == CE_HASHED )
182+ remove_dir_entry (istate , ce );
183+
184+ ce -> ce_flags |= CE_UNHASHED ;
185+ }
186+
107187static int slow_same_name (const char * name1 , int len1 , const char * name2 , int len2 )
108188{
109189 if (len1 != len2 )
@@ -137,18 +217,7 @@ static int same_name(const struct cache_entry *ce, const char *name, int namelen
137217 if (!icase )
138218 return 0 ;
139219
140- /*
141- * If the entry we're comparing is a filename (no trailing slash), then compare
142- * the lengths exactly.
143- */
144- if (name [namelen - 1 ] != '/' )
145- return slow_same_name (name , namelen , ce -> name , len );
146-
147- /*
148- * For a directory, we point to an arbitrary cache_entry filename. Just
149- * make sure the directory portion matches.
150- */
151- return slow_same_name (name , namelen , ce -> name , namelen < len ? namelen : len );
220+ return slow_same_name (name , namelen , ce -> name , len );
152221}
153222
154223struct cache_entry * index_name_exists (struct index_state * istate , const char * name , int namelen , int icase )
@@ -164,27 +233,54 @@ struct cache_entry *index_name_exists(struct index_state *istate, const char *na
164233 if (same_name (ce , name , namelen , icase ))
165234 return ce ;
166235 }
167- if (icase && name [namelen - 1 ] == '/' )
168- ce = ce -> dir_next ;
169- else
170- ce = ce -> next ;
236+ ce = ce -> next ;
171237 }
172238
173239 /*
174- * Might be a submodule. Despite submodules being directories,
240+ * When looking for a directory (trailing '/'), it might be a
241+ * submodule or a directory. Despite submodules being directories,
175242 * they are stored in the name hash without a closing slash.
176- * When ignore_case is 1, directories are stored in the name hash
177- * with their closing slash.
243+ * When ignore_case is 1, directories are stored in a separate hash
244+ * table * with* their closing slash.
178245 *
179246 * The side effect of this storage technique is we have need to
247+ * lookup the directory in a separate hash table, and if not found
180248 * remove the slash from name and perform the lookup again without
181249 * the slash. If a match is made, S_ISGITLINK(ce->mode) will be
182250 * true.
183251 */
184252 if (icase && name [namelen - 1 ] == '/' ) {
253+ struct dir_entry * dir = find_dir_entry (istate , name , namelen );
254+ if (dir && dir -> nr )
255+ return dir -> ce ;
256+
185257 ce = index_name_exists (istate , name , namelen - 1 , icase );
186258 if (ce && S_ISGITLINK (ce -> ce_mode ))
187259 return ce ;
188260 }
189261 return NULL ;
190262}
263+
264+ static int free_dir_entry (void * entry , void * unused )
265+ {
266+ struct dir_entry * dir = entry ;
267+ while (dir ) {
268+ struct dir_entry * next = dir -> next ;
269+ free (dir );
270+ dir = next ;
271+ }
272+ return 0 ;
273+ }
274+
275+ void free_name_hash (struct index_state * istate )
276+ {
277+ if (!istate -> name_hash_initialized )
278+ return ;
279+ istate -> name_hash_initialized = 0 ;
280+ if (ignore_case )
281+ /* free directory entries */
282+ for_each_hash (& istate -> dir_hash , free_dir_entry , NULL );
283+
284+ free_hash (& istate -> name_hash );
285+ free_hash (& istate -> dir_hash );
286+ }
0 commit comments