Skip to content

Commit de34f26

Browse files
pks-tgitster
authored andcommitted
dir-iterator: support iteration in sorted order
The `struct dir_iterator` is a helper that allows us to iterate through directory entries. This iterator returns entries in the exact same order as readdir(3P) does -- or in other words, it guarantees no specific order at all. This is about to become problematic as we are introducing a new reflog subcommand to list reflogs. As the "files" backend uses the directory iterator to enumerate reflogs, returning reflog names and exposing them to the user would inherit the indeterministic ordering. Naturally, it would make for a terrible user interface to show a list with no discernible order. While this could be handled at a higher level by the new subcommand itself by collecting and ordering the reflogs, this would be inefficient because we would first have to collect all reflogs before we can sort them, which would introduce additional latency when there are many reflogs. Instead, introduce a new option into the directory iterator that asks for its entries to be yielded in lexicographical order. If set, the iterator will read all directory entries greedily and sort them before we start to iterate over them. While this will of course also incur overhead as we cannot yield the directory entries immediately, it should at least be more efficient than having to sort the complete list of reflogs as we only need to sort one directory at a time. This functionality will be used in a follow-up commit. Signed-off-by: Patrick Steinhardt <[email protected]> Signed-off-by: Junio C Hamano <[email protected]>
1 parent 0218de2 commit de34f26

File tree

2 files changed

+89
-13
lines changed

2 files changed

+89
-13
lines changed

dir-iterator.c

Lines changed: 86 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -2,10 +2,19 @@
22
#include "dir.h"
33
#include "iterator.h"
44
#include "dir-iterator.h"
5+
#include "string-list.h"
56

67
struct dir_iterator_level {
78
DIR *dir;
89

10+
/*
11+
* The directory entries of the current level. This list will only be
12+
* populated when the iterator is ordered. In that case, `dir` will be
13+
* set to `NULL`.
14+
*/
15+
struct string_list entries;
16+
size_t entries_idx;
17+
918
/*
1019
* The length of the directory part of path at this level
1120
* (including a trailing '/'):
@@ -43,6 +52,31 @@ struct dir_iterator_int {
4352
unsigned int flags;
4453
};
4554

55+
static int next_directory_entry(DIR *dir, const char *path,
56+
struct dirent **out)
57+
{
58+
struct dirent *de;
59+
60+
repeat:
61+
errno = 0;
62+
de = readdir(dir);
63+
if (!de) {
64+
if (errno) {
65+
warning_errno("error reading directory '%s'",
66+
path);
67+
return -1;
68+
}
69+
70+
return 1;
71+
}
72+
73+
if (is_dot_or_dotdot(de->d_name))
74+
goto repeat;
75+
76+
*out = de;
77+
return 0;
78+
}
79+
4680
/*
4781
* Push a level in the iter stack and initialize it with information from
4882
* the directory pointed by iter->base->path. It is assumed that this
@@ -72,6 +106,35 @@ static int push_level(struct dir_iterator_int *iter)
72106
return -1;
73107
}
74108

109+
string_list_init_dup(&level->entries);
110+
level->entries_idx = 0;
111+
112+
/*
113+
* When the iterator is sorted we read and sort all directory entries
114+
* directly.
115+
*/
116+
if (iter->flags & DIR_ITERATOR_SORTED) {
117+
struct dirent *de;
118+
119+
while (1) {
120+
int ret = next_directory_entry(level->dir, iter->base.path.buf, &de);
121+
if (ret < 0) {
122+
if (errno != ENOENT &&
123+
iter->flags & DIR_ITERATOR_PEDANTIC)
124+
return -1;
125+
continue;
126+
} else if (ret > 0) {
127+
break;
128+
}
129+
130+
string_list_append(&level->entries, de->d_name);
131+
}
132+
string_list_sort(&level->entries);
133+
134+
closedir(level->dir);
135+
level->dir = NULL;
136+
}
137+
75138
return 0;
76139
}
77140

@@ -88,6 +151,7 @@ static int pop_level(struct dir_iterator_int *iter)
88151
warning_errno("error closing directory '%s'",
89152
iter->base.path.buf);
90153
level->dir = NULL;
154+
string_list_clear(&level->entries, 0);
91155

92156
return --iter->levels_nr;
93157
}
@@ -139,27 +203,34 @@ int dir_iterator_advance(struct dir_iterator *dir_iterator)
139203
struct dirent *de;
140204
struct dir_iterator_level *level =
141205
&iter->levels[iter->levels_nr - 1];
206+
const char *name;
142207

143208
strbuf_setlen(&iter->base.path, level->prefix_len);
144-
errno = 0;
145-
de = readdir(level->dir);
146209

147-
if (!de) {
148-
if (errno) {
149-
warning_errno("error reading directory '%s'",
150-
iter->base.path.buf);
210+
if (level->dir) {
211+
int ret = next_directory_entry(level->dir, iter->base.path.buf, &de);
212+
if (ret < 0) {
151213
if (iter->flags & DIR_ITERATOR_PEDANTIC)
152214
goto error_out;
153-
} else if (pop_level(iter) == 0) {
154-
return dir_iterator_abort(dir_iterator);
215+
continue;
216+
} else if (ret > 0) {
217+
if (pop_level(iter) == 0)
218+
return dir_iterator_abort(dir_iterator);
219+
continue;
155220
}
156-
continue;
157-
}
158221

159-
if (is_dot_or_dotdot(de->d_name))
160-
continue;
222+
name = de->d_name;
223+
} else {
224+
if (level->entries_idx >= level->entries.nr) {
225+
if (pop_level(iter) == 0)
226+
return dir_iterator_abort(dir_iterator);
227+
continue;
228+
}
161229

162-
if (prepare_next_entry_data(iter, de->d_name)) {
230+
name = level->entries.items[level->entries_idx++].string;
231+
}
232+
233+
if (prepare_next_entry_data(iter, name)) {
163234
if (errno != ENOENT && iter->flags & DIR_ITERATOR_PEDANTIC)
164235
goto error_out;
165236
continue;
@@ -188,6 +259,8 @@ int dir_iterator_abort(struct dir_iterator *dir_iterator)
188259
warning_errno("error closing directory '%s'",
189260
iter->base.path.buf);
190261
}
262+
263+
string_list_clear(&level->entries, 0);
191264
}
192265

193266
free(iter->levels);

dir-iterator.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,8 +54,11 @@
5454
* and ITER_ERROR is returned immediately. In both cases, a meaningful
5555
* warning is emitted. Note: ENOENT errors are always ignored so that
5656
* the API users may remove files during iteration.
57+
*
58+
* - DIR_ITERATOR_SORTED: sort directory entries alphabetically.
5759
*/
5860
#define DIR_ITERATOR_PEDANTIC (1 << 0)
61+
#define DIR_ITERATOR_SORTED (1 << 1)
5962

6063
struct dir_iterator {
6164
/* The current path: */

0 commit comments

Comments
 (0)