Skip to content

Commit 432ad41

Browse files
mhaggergitster
authored andcommitted
refs: store references hierarchically
Store references hierarchically in a tree that matches the pseudo-directory structure of the reference names. Add a new kind of ref_entry (with flag REF_DIR) to represent a whole subdirectory of references. Sort ref_dirs one subdirectory at a time. NOTE: the dirs can now be sorted as a side-effect of other function calls. Therefore, it would be problematic to do something from a each_ref_fn callback that could provoke the sorting of a directory that is currently being iterated over (i.e., the directory containing the entry that is being processed or any of its parents). This is a bit far-fetched, because a directory is always sorted just before being iterated over. Therefore, read-only accesses cannot trigger the sorting of a directory whose iteration has already started. But if a callback function would add a reference to a parent directory of the reference in the iteration, then try to resolve a reference under that directory, a re-sort could be triggered and cause the iteration to work incorrectly. Nevertheless...add a comment in refs.h warning against modifications during iteration. Signed-off-by: Michael Haggerty <[email protected]> Signed-off-by: Junio C Hamano <[email protected]>
1 parent 81a79d8 commit 432ad41

File tree

2 files changed

+232
-49
lines changed

2 files changed

+232
-49
lines changed

refs.c

Lines changed: 227 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -120,15 +120,54 @@ struct ref_dir {
120120
struct ref_entry **entries;
121121
};
122122

123-
/* ISSYMREF=0x01, ISPACKED=0x02 and ISBROKEN=0x04 are public interfaces */
124-
#define REF_KNOWS_PEELED 0x10
123+
/* ISSYMREF=0x01, ISPACKED=0x02, and ISBROKEN=0x04 are public interfaces */
124+
#define REF_KNOWS_PEELED 0x08
125+
#define REF_DIR 0x10
125126

127+
/*
128+
* A ref_entry represents either a reference or a "subdirectory" of
129+
* references. Each directory in the reference namespace is
130+
* represented by a ref_entry with (flags & REF_DIR) set and
131+
* containing a subdir member that holds the entries in that
132+
* directory. References are represented by a ref_entry with (flags &
133+
* REF_DIR) unset and a value member that describes the reference's
134+
* value. The flag member is at the ref_entry level, but it is also
135+
* needed to interpret the contents of the value field (in other
136+
* words, a ref_value object is not very much use without the
137+
* enclosing ref_entry).
138+
*
139+
* Reference names cannot end with slash and directories' names are
140+
* always stored with a trailing slash (except for the top-level
141+
* directory, which is always denoted by ""). This has two nice
142+
* consequences: (1) when the entries in each subdir are sorted
143+
* lexicographically by name (as they usually are), the references in
144+
* a whole tree can be generated in lexicographic order by traversing
145+
* the tree in left-to-right, depth-first order; (2) the names of
146+
* references and subdirectories cannot conflict, and therefore the
147+
* presence of an empty subdirectory does not block the creation of a
148+
* similarly-named reference. (The fact that reference names with the
149+
* same leading components can conflict *with each other* is a
150+
* separate issue that is regulated by is_refname_available().)
151+
*
152+
* Please note that the name field contains the fully-qualified
153+
* reference (or subdirectory) name. Space could be saved by only
154+
* storing the relative names. But that would require the full names
155+
* to be generated on the fly when iterating in do_for_each_ref(), and
156+
* would break callback functions, who have always been able to assume
157+
* that the name strings that they are passed will not be freed during
158+
* the iteration.
159+
*/
126160
struct ref_entry {
127161
unsigned char flag; /* ISSYMREF? ISPACKED? */
128162
union {
129-
struct ref_value value;
163+
struct ref_value value; /* if not (flags&REF_DIR) */
164+
struct ref_dir subdir; /* if (flags&REF_DIR) */
130165
} u;
131-
/* The full name of the reference (e.g., "refs/heads/master"): */
166+
/*
167+
* The full name of the reference (e.g., "refs/heads/master")
168+
* or the full name of the directory with a trailing slash
169+
* (e.g., "refs/heads/"):
170+
*/
132171
char name[FLEX_ARRAY];
133172
};
134173

@@ -151,18 +190,29 @@ static struct ref_entry *create_ref_entry(const char *refname,
151190
return ref;
152191
}
153192

193+
static void clear_ref_dir(struct ref_dir *dir);
194+
154195
static void free_ref_entry(struct ref_entry *entry)
155196
{
197+
if (entry->flag & REF_DIR)
198+
clear_ref_dir(&entry->u.subdir);
156199
free(entry);
157200
}
158201

159-
/* Add a ref_entry to the end of the ref_dir (unsorted). */
160-
static void add_ref(struct ref_dir *refs, struct ref_entry *ref)
202+
/*
203+
* Add a ref_entry to the end of dir (unsorted). Entry is always
204+
* stored directly in dir; no recursion into subdirectories is
205+
* done.
206+
*/
207+
static void add_entry_to_dir(struct ref_dir *dir, struct ref_entry *entry)
161208
{
162-
ALLOC_GROW(refs->entries, refs->nr + 1, refs->alloc);
163-
refs->entries[refs->nr++] = ref;
209+
ALLOC_GROW(dir->entries, dir->nr + 1, dir->alloc);
210+
dir->entries[dir->nr++] = entry;
164211
}
165212

213+
/*
214+
* Clear and free all entries in dir, recursively.
215+
*/
166216
static void clear_ref_dir(struct ref_dir *dir)
167217
{
168218
int i;
@@ -173,6 +223,21 @@ static void clear_ref_dir(struct ref_dir *dir)
173223
dir->entries = NULL;
174224
}
175225

226+
/*
227+
* Create a struct ref_entry object for the specified dirname.
228+
* dirname is the name of the directory with a trailing slash (e.g.,
229+
* "refs/heads/") or "" for the top-level directory.
230+
*/
231+
static struct ref_entry *create_dir_entry(const char *dirname)
232+
{
233+
struct ref_entry *direntry;
234+
int len = strlen(dirname);
235+
direntry = xcalloc(1, sizeof(struct ref_entry) + len + 1);
236+
memcpy(direntry->name, dirname, len + 1);
237+
direntry->flag = REF_DIR;
238+
return direntry;
239+
}
240+
176241
static int ref_entry_cmp(const void *a, const void *b)
177242
{
178243
struct ref_entry *one = *(struct ref_entry **)a;
@@ -182,17 +247,21 @@ static int ref_entry_cmp(const void *a, const void *b)
182247

183248
static void sort_ref_dir(struct ref_dir *dir);
184249

250+
/*
251+
* Return the entry with the given refname from the ref_dir
252+
* (non-recursively), sorting dir if necessary. Return NULL if no
253+
* such entry is found.
254+
*/
185255
static struct ref_entry *search_ref_dir(struct ref_dir *dir, const char *refname)
186256
{
187257
struct ref_entry *e, **r;
188258
int len;
189259

190-
if (refname == NULL)
260+
if (refname == NULL || !dir->nr)
191261
return NULL;
192262

193-
if (!dir->nr)
194-
return NULL;
195263
sort_ref_dir(dir);
264+
196265
len = strlen(refname) + 1;
197266
e = xmalloc(sizeof(struct ref_entry) + len);
198267
memcpy(e->name, refname, len);
@@ -207,28 +276,97 @@ static struct ref_entry *search_ref_dir(struct ref_dir *dir, const char *refname
207276
return *r;
208277
}
209278

279+
/*
280+
* If refname is a reference name, find the ref_dir within the dir
281+
* tree that should hold refname. If refname is a directory name
282+
* (i.e., ends in '/'), then return that ref_dir itself. dir must
283+
* represent the top-level directory. Sort ref_dirs and recurse into
284+
* subdirectories as necessary. If mkdir is set, then create any
285+
* missing directories; otherwise, return NULL if the desired
286+
* directory cannot be found.
287+
*/
288+
static struct ref_dir *find_containing_dir(struct ref_dir *dir,
289+
const char *refname, int mkdir)
290+
{
291+
char *refname_copy = xstrdup(refname);
292+
char *slash;
293+
struct ref_entry *entry;
294+
for (slash = strchr(refname_copy, '/'); slash; slash = strchr(slash + 1, '/')) {
295+
char tmp = slash[1];
296+
slash[1] = '\0';
297+
entry = search_ref_dir(dir, refname_copy);
298+
if (!entry) {
299+
if (!mkdir) {
300+
dir = NULL;
301+
break;
302+
}
303+
entry = create_dir_entry(refname_copy);
304+
add_entry_to_dir(dir, entry);
305+
}
306+
slash[1] = tmp;
307+
assert(entry->flag & REF_DIR);
308+
dir = &entry->u.subdir;
309+
}
310+
311+
free(refname_copy);
312+
return dir;
313+
}
314+
315+
/*
316+
* Find the value entry with the given name in dir, sorting ref_dirs
317+
* and recursing into subdirectories as necessary. If the name is not
318+
* found or it corresponds to a directory entry, return NULL.
319+
*/
320+
static struct ref_entry *find_ref(struct ref_dir *dir, const char *refname)
321+
{
322+
struct ref_entry *entry;
323+
dir = find_containing_dir(dir, refname, 0);
324+
if (!dir)
325+
return NULL;
326+
entry = search_ref_dir(dir, refname);
327+
return (entry && !(entry->flag & REF_DIR)) ? entry : NULL;
328+
}
329+
330+
/*
331+
* Add a ref_entry to the ref_dir (unsorted), recursing into
332+
* subdirectories as necessary. dir must represent the top-level
333+
* directory. Return 0 on success.
334+
*/
335+
static int add_ref(struct ref_dir *dir, struct ref_entry *ref)
336+
{
337+
dir = find_containing_dir(dir, ref->name, 1);
338+
if (!dir)
339+
return -1;
340+
add_entry_to_dir(dir, ref);
341+
return 0;
342+
}
343+
210344
/*
211345
* Emit a warning and return true iff ref1 and ref2 have the same name
212346
* and the same sha1. Die if they have the same name but different
213347
* sha1s.
214348
*/
215349
static int is_dup_ref(const struct ref_entry *ref1, const struct ref_entry *ref2)
216350
{
217-
if (!strcmp(ref1->name, ref2->name)) {
218-
/* Duplicate name; make sure that the SHA1s match: */
219-
if (hashcmp(ref1->u.value.sha1, ref2->u.value.sha1))
220-
die("Duplicated ref, and SHA1s don't match: %s",
221-
ref1->name);
222-
warning("Duplicated ref: %s", ref1->name);
223-
return 1;
224-
} else {
351+
if (strcmp(ref1->name, ref2->name))
225352
return 0;
226-
}
353+
354+
/* Duplicate name; make sure that they don't conflict: */
355+
356+
if ((ref1->flag & REF_DIR) || (ref2->flag & REF_DIR))
357+
/* This is impossible by construction */
358+
die("Reference directory conflict: %s", ref1->name);
359+
360+
if (hashcmp(ref1->u.value.sha1, ref2->u.value.sha1))
361+
die("Duplicated ref, and SHA1s don't match: %s", ref1->name);
362+
363+
warning("Duplicated ref: %s", ref1->name);
364+
return 1;
227365
}
228366

229367
/*
230-
* Sort the entries in dir (if they are not already sorted)
231-
* and remove any duplicate entries.
368+
* Sort the entries in dir non-recursively (if they are not already
369+
* sorted) and remove any duplicate entries.
232370
*/
233371
static void sort_ref_dir(struct ref_dir *dir)
234372
{
@@ -282,8 +420,9 @@ static int do_one_ref(const char *base, each_ref_fn fn, int trim,
282420

283421
/*
284422
* Call fn for each reference in dir that has index in the range
285-
* offset <= index < dir->nr. This function does not sort the dir;
286-
* sorting should be done by the caller.
423+
* offset <= index < dir->nr. Recurse into subdirectories that are in
424+
* that index range, sorting them before iterating. This function
425+
* does not sort dir itself; it should be sorted beforehand.
287426
*/
288427
static int do_for_each_ref_in_dir(struct ref_dir *dir, int offset,
289428
const char *base,
@@ -292,7 +431,15 @@ static int do_for_each_ref_in_dir(struct ref_dir *dir, int offset,
292431
int i;
293432
assert(dir->sorted == dir->nr);
294433
for (i = offset; i < dir->nr; i++) {
295-
int retval = do_one_ref(base, fn, trim, flags, cb_data, dir->entries[i]);
434+
struct ref_entry *entry = dir->entries[i];
435+
int retval;
436+
if (entry->flag & REF_DIR) {
437+
sort_ref_dir(&entry->u.subdir);
438+
retval = do_for_each_ref_in_dir(&entry->u.subdir, 0,
439+
base, fn, trim, flags, cb_data);
440+
} else {
441+
retval = do_one_ref(base, fn, trim, flags, cb_data, entry);
442+
}
296443
if (retval)
297444
return retval;
298445
}
@@ -301,9 +448,10 @@ static int do_for_each_ref_in_dir(struct ref_dir *dir, int offset,
301448

302449
/*
303450
* Call fn for each reference in the union of dir1 and dir2, in order
304-
* by refname. If an entry appears in both dir1 and dir2, then only
305-
* process the version that is in dir2. The input dirs must already
306-
* be sorted.
451+
* by refname. Recurse into subdirectories. If a value entry appears
452+
* in both dir1 and dir2, then only process the version that is in
453+
* dir2. The input dirs must already be sorted, but subdirs will be
454+
* sorted as needed.
307455
*/
308456
static int do_for_each_ref_in_dirs(struct ref_dir *dir1,
309457
struct ref_dir *dir2,
@@ -315,22 +463,55 @@ static int do_for_each_ref_in_dirs(struct ref_dir *dir1,
315463

316464
assert(dir1->sorted == dir1->nr);
317465
assert(dir2->sorted == dir2->nr);
318-
while (i1 < dir1->nr && i2 < dir2->nr) {
319-
struct ref_entry *e1 = dir1->entries[i1];
320-
struct ref_entry *e2 = dir2->entries[i2];
321-
int cmp = strcmp(e1->name, e2->name);
322-
if (cmp < 0) {
323-
retval = do_one_ref(base, fn, trim, flags, cb_data, e1);
324-
i1++;
466+
while (1) {
467+
struct ref_entry *e1, *e2;
468+
int cmp;
469+
if (i1 == dir1->nr) {
470+
return do_for_each_ref_in_dir(dir2, i2,
471+
base, fn, trim, flags, cb_data);
472+
}
473+
if (i2 == dir2->nr) {
474+
return do_for_each_ref_in_dir(dir1, i1,
475+
base, fn, trim, flags, cb_data);
476+
}
477+
e1 = dir1->entries[i1];
478+
e2 = dir2->entries[i2];
479+
cmp = strcmp(e1->name, e2->name);
480+
if (cmp == 0) {
481+
if ((e1->flag & REF_DIR) && (e2->flag & REF_DIR)) {
482+
/* Both are directories; descend them in parallel. */
483+
sort_ref_dir(&e1->u.subdir);
484+
sort_ref_dir(&e2->u.subdir);
485+
retval = do_for_each_ref_in_dirs(
486+
&e1->u.subdir, &e2->u.subdir,
487+
base, fn, trim, flags, cb_data);
488+
i1++;
489+
i2++;
490+
} else if (!(e1->flag & REF_DIR) && !(e2->flag & REF_DIR)) {
491+
/* Both are references; ignore the one from dir1. */
492+
retval = do_one_ref(base, fn, trim, flags, cb_data, e2);
493+
i1++;
494+
i2++;
495+
} else {
496+
die("conflict between reference and directory: %s",
497+
e1->name);
498+
}
325499
} else {
326-
retval = do_one_ref(base, fn, trim, flags, cb_data, e2);
327-
i2++;
328-
if (cmp == 0) {
329-
/*
330-
* There was a ref in array1 with the
331-
* same name; ignore it.
332-
*/
500+
struct ref_entry *e;
501+
if (cmp < 0) {
502+
e = e1;
333503
i1++;
504+
} else {
505+
e = e2;
506+
i2++;
507+
}
508+
if (e->flag & REF_DIR) {
509+
sort_ref_dir(&e->u.subdir);
510+
retval = do_for_each_ref_in_dir(
511+
&e->u.subdir, 0,
512+
base, fn, trim, flags, cb_data);
513+
} else {
514+
retval = do_one_ref(base, fn, trim, flags, cb_data, e);
334515
}
335516
}
336517
if (retval)
@@ -655,7 +836,7 @@ static int resolve_gitlink_packed_ref(struct ref_cache *refs,
655836
struct ref_entry *ref;
656837
struct ref_dir *dir = get_packed_refs(refs);
657838

658-
ref = search_ref_dir(dir, refname);
839+
ref = find_ref(dir, refname);
659840
if (ref == NULL)
660841
return -1;
661842

@@ -727,7 +908,7 @@ int resolve_gitlink_ref(const char *path, const char *refname, unsigned char *sh
727908
static int get_packed_ref(const char *refname, unsigned char *sha1)
728909
{
729910
struct ref_dir *packed = get_packed_refs(get_ref_cache(NULL));
730-
struct ref_entry *entry = search_ref_dir(packed, refname);
911+
struct ref_entry *entry = find_ref(packed, refname);
731912
if (entry) {
732913
hashcpy(sha1, entry->u.value.sha1);
733914
return 0;
@@ -904,7 +1085,7 @@ int peel_ref(const char *refname, unsigned char *sha1)
9041085

9051086
if ((flag & REF_ISPACKED)) {
9061087
struct ref_dir *dir = get_packed_refs(get_ref_cache(NULL));
907-
struct ref_entry *r = search_ref_dir(dir, refname);
1088+
struct ref_entry *r = find_ref(dir, refname);
9081089

9091090
if (r != NULL && r->flag & REF_KNOWS_PEELED) {
9101091
hashcpy(sha1, r->u.value.peeled);
@@ -1404,8 +1585,7 @@ static int repack_without_ref(const char *refname)
14041585
{
14051586
struct repack_without_ref_sb data;
14061587
struct ref_dir *packed = get_packed_refs(get_ref_cache(NULL));
1407-
sort_ref_dir(packed);
1408-
if (search_ref_dir(packed, refname) == NULL)
1588+
if (find_ref(packed, refname) == NULL)
14091589
return 0;
14101590
data.refname = refname;
14111591
data.fd = hold_lock_file_for_update(&packlock, git_path("packed-refs"), 0);

0 commit comments

Comments
 (0)