Skip to content

Commit d4f4de5

Browse files
author
Al Viro
committed
Fix the locking in dcache_readdir() and friends
There are two problems in dcache_readdir() - one is that lockless traversal of the list needs non-trivial cooperation of d_alloc() (at least a switch to list_add_rcu(), and probably more than just that) and another is that it assumes that no removal will happen without the directory locked exclusive. Said assumption had always been there, never had been stated explicitly and is violated by several places in the kernel (devpts and selinuxfs). * replacement of next_positive() with different calling conventions: it returns struct list_head * instead of struct dentry *; the latter is passed in and out by reference, grabbing the result and dropping the original value. * scan is under ->d_lock. If we run out of timeslice, cursor is moved after the last position we'd reached and we reschedule; then the scan continues from that place. To avoid livelocks between multiple lseek() (with cursors getting moved past each other, never reaching the real entries) we always skip the cursors, need_resched() or not. * returned list_head * is either ->d_child of dentry we'd found or ->d_subdirs of parent (if we got to the end of the list). * dcache_readdir() and dcache_dir_lseek() switched to new helper. dcache_readdir() always holds a reference to dentry passed to dir_emit() now. Cursor is moved to just before the entry where dir_emit() has failed or into the very end of the list, if we'd run out. * move_cursor() eliminated - it had sucky calling conventions and after fixing that it became simply list_move() (in lseek and scan_positives) or list_move_tail() (in readdir). All operations with the list are under ->d_lock now, and we do not depend upon having all file removals done with parent locked exclusive anymore. Cc: [email protected] Reported-by: "zhengbin (A)" <[email protected]> Signed-off-by: Al Viro <[email protected]>
1 parent 4d856f7 commit d4f4de5

File tree

1 file changed

+69
-65
lines changed

1 file changed

+69
-65
lines changed

fs/libfs.c

Lines changed: 69 additions & 65 deletions
Original file line numberDiff line numberDiff line change
@@ -89,58 +89,47 @@ int dcache_dir_close(struct inode *inode, struct file *file)
8989
EXPORT_SYMBOL(dcache_dir_close);
9090

9191
/* parent is locked at least shared */
92-
static struct dentry *next_positive(struct dentry *parent,
93-
struct list_head *from,
94-
int count)
92+
/*
93+
* Returns an element of siblings' list.
94+
* We are looking for <count>th positive after <p>; if
95+
* found, dentry is grabbed and passed to caller via *<res>.
96+
* If no such element exists, the anchor of list is returned
97+
* and *<res> is set to NULL.
98+
*/
99+
static struct list_head *scan_positives(struct dentry *cursor,
100+
struct list_head *p,
101+
loff_t count,
102+
struct dentry **res)
95103
{
96-
unsigned *seq = &parent->d_inode->i_dir_seq, n;
97-
struct dentry *res;
98-
struct list_head *p;
99-
bool skipped;
100-
int i;
104+
struct dentry *dentry = cursor->d_parent, *found = NULL;
101105

102-
retry:
103-
i = count;
104-
skipped = false;
105-
n = smp_load_acquire(seq) & ~1;
106-
res = NULL;
107-
rcu_read_lock();
108-
for (p = from->next; p != &parent->d_subdirs; p = p->next) {
106+
spin_lock(&dentry->d_lock);
107+
while ((p = p->next) != &dentry->d_subdirs) {
109108
struct dentry *d = list_entry(p, struct dentry, d_child);
110-
if (!simple_positive(d)) {
111-
skipped = true;
112-
} else if (!--i) {
113-
res = d;
114-
break;
109+
// we must at least skip cursors, to avoid livelocks
110+
if (d->d_flags & DCACHE_DENTRY_CURSOR)
111+
continue;
112+
if (simple_positive(d) && !--count) {
113+
spin_lock_nested(&d->d_lock, DENTRY_D_LOCK_NESTED);
114+
if (simple_positive(d))
115+
found = dget_dlock(d);
116+
spin_unlock(&d->d_lock);
117+
if (likely(found))
118+
break;
119+
count = 1;
120+
}
121+
if (need_resched()) {
122+
list_move(&cursor->d_child, p);
123+
p = &cursor->d_child;
124+
spin_unlock(&dentry->d_lock);
125+
cond_resched();
126+
spin_lock(&dentry->d_lock);
115127
}
116128
}
117-
rcu_read_unlock();
118-
if (skipped) {
119-
smp_rmb();
120-
if (unlikely(*seq != n))
121-
goto retry;
122-
}
123-
return res;
124-
}
125-
126-
static void move_cursor(struct dentry *cursor, struct list_head *after)
127-
{
128-
struct dentry *parent = cursor->d_parent;
129-
unsigned n, *seq = &parent->d_inode->i_dir_seq;
130-
spin_lock(&parent->d_lock);
131-
for (;;) {
132-
n = *seq;
133-
if (!(n & 1) && cmpxchg(seq, n, n + 1) == n)
134-
break;
135-
cpu_relax();
136-
}
137-
__list_del(cursor->d_child.prev, cursor->d_child.next);
138-
if (after)
139-
list_add(&cursor->d_child, after);
140-
else
141-
list_add_tail(&cursor->d_child, &parent->d_subdirs);
142-
smp_store_release(seq, n + 2);
143-
spin_unlock(&parent->d_lock);
129+
spin_unlock(&dentry->d_lock);
130+
dput(*res);
131+
*res = found;
132+
return p;
144133
}
145134

146135
loff_t dcache_dir_lseek(struct file *file, loff_t offset, int whence)
@@ -158,17 +147,28 @@ loff_t dcache_dir_lseek(struct file *file, loff_t offset, int whence)
158147
return -EINVAL;
159148
}
160149
if (offset != file->f_pos) {
150+
struct dentry *cursor = file->private_data;
151+
struct dentry *to = NULL;
152+
struct list_head *p;
153+
161154
file->f_pos = offset;
162-
if (file->f_pos >= 2) {
163-
struct dentry *cursor = file->private_data;
164-
struct dentry *to;
165-
loff_t n = file->f_pos - 2;
166-
167-
inode_lock_shared(dentry->d_inode);
168-
to = next_positive(dentry, &dentry->d_subdirs, n);
169-
move_cursor(cursor, to ? &to->d_child : NULL);
170-
inode_unlock_shared(dentry->d_inode);
155+
inode_lock_shared(dentry->d_inode);
156+
157+
if (file->f_pos > 2) {
158+
p = scan_positives(cursor, &dentry->d_subdirs,
159+
file->f_pos - 2, &to);
160+
spin_lock(&dentry->d_lock);
161+
list_move(&cursor->d_child, p);
162+
spin_unlock(&dentry->d_lock);
163+
} else {
164+
spin_lock(&dentry->d_lock);
165+
list_del_init(&cursor->d_child);
166+
spin_unlock(&dentry->d_lock);
171167
}
168+
169+
dput(to);
170+
171+
inode_unlock_shared(dentry->d_inode);
172172
}
173173
return offset;
174174
}
@@ -190,25 +190,29 @@ int dcache_readdir(struct file *file, struct dir_context *ctx)
190190
{
191191
struct dentry *dentry = file->f_path.dentry;
192192
struct dentry *cursor = file->private_data;
193-
struct list_head *p = &cursor->d_child;
194-
struct dentry *next;
195-
bool moved = false;
193+
struct list_head *anchor = &dentry->d_subdirs;
194+
struct dentry *next = NULL;
195+
struct list_head *p;
196196

197197
if (!dir_emit_dots(file, ctx))
198198
return 0;
199199

200200
if (ctx->pos == 2)
201-
p = &dentry->d_subdirs;
202-
while ((next = next_positive(dentry, p, 1)) != NULL) {
201+
p = anchor;
202+
else
203+
p = &cursor->d_child;
204+
205+
while ((p = scan_positives(cursor, p, 1, &next)) != anchor) {
203206
if (!dir_emit(ctx, next->d_name.name, next->d_name.len,
204207
d_inode(next)->i_ino, dt_type(d_inode(next))))
205208
break;
206-
moved = true;
207-
p = &next->d_child;
208209
ctx->pos++;
209210
}
210-
if (moved)
211-
move_cursor(cursor, p);
211+
spin_lock(&dentry->d_lock);
212+
list_move_tail(&cursor->d_child, p);
213+
spin_unlock(&dentry->d_lock);
214+
dput(next);
215+
212216
return 0;
213217
}
214218
EXPORT_SYMBOL(dcache_readdir);

0 commit comments

Comments
 (0)