Skip to content

Commit 7e587c2

Browse files
committed
Merge tag 'vfs-6.14-rc1.libfs' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs
Pull vfs libfs updates from Christian Brauner: "This improves the stable directory offset behavior in various ways. Stable offsets are needed so that NFS can reliably read directories on filesystems such as tmpfs: - Improve the end-of-directory detection According to getdents(3), the d_off field in each returned directory entry points to the next entry in the directory. The d_off field in the last returned entry in the readdir buffer must contain a valid offset value, but if it points to an actual directory entry, then readdir/getdents can loop. Introduce a specific fixed offset value that is placed in the d_off field of the last entry in a directory. Some user space applications assume that the EOD offset value is larger than the offsets of real directory entries, so the largest valid offset value is reserved for this purpose. This new value is never allocated by simple_offset_add(). When ->iterate_dir() returns, getdents{64} inserts the ctx->pos value into the d_off field of the last valid entry in the readdir buffer. When it hits EOD, offset_readdir() sets ctx->pos to the EOD offset value so the last entry is updated to point to the EOD marker. When trying to read the entry at the EOD offset, offset_readdir() terminates immediately. - Rely on d_children to iterate stable offset directories Instead of using the mtree to emit entries in the order of their offset values, use it only to map incoming ctx->pos to a starting entry. Then use the directory's d_children list, which is already maintained properly by the dcache, to find the next child to emit. - Narrow the range of directory offset values returned by simple_offset_add() to 3 .. (S32_MAX - 1) on all platforms. This means the allocation behavior is identical on 32-bit systems, 64-bit systems, and 32-bit user space on 64-bit kernels. The new range still permits over 2 billion concurrent entries per directory. - Return ENOSPC when the directory offset range is exhausted. Hitting this error is almost impossible though. - Remove the simple_offset_empty() helper" * tag 'vfs-6.14-rc1.libfs' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs: libfs: Use d_children list to iterate simple_offset directories libfs: Replace simple_offset end-of-directory detection Revert "libfs: fix infinite directory reads for offset dir" Revert "libfs: Add simple_offset_empty()" libfs: Return ENOSPC when the directory offset range is exhausted
2 parents 100ceb4 + a0634b4 commit 7e587c2

File tree

3 files changed

+79
-88
lines changed

3 files changed

+79
-88
lines changed

fs/libfs.c

Lines changed: 77 additions & 85 deletions
Original file line numberDiff line numberDiff line change
@@ -245,9 +245,16 @@ const struct inode_operations simple_dir_inode_operations = {
245245
};
246246
EXPORT_SYMBOL(simple_dir_inode_operations);
247247

248-
/* 0 is '.', 1 is '..', so always start with offset 2 or more */
248+
/* simple_offset_add() never assigns these to a dentry */
249249
enum {
250-
DIR_OFFSET_MIN = 2,
250+
DIR_OFFSET_FIRST = 2, /* Find first real entry */
251+
DIR_OFFSET_EOD = S32_MAX,
252+
};
253+
254+
/* simple_offset_add() allocation range */
255+
enum {
256+
DIR_OFFSET_MIN = DIR_OFFSET_FIRST + 1,
257+
DIR_OFFSET_MAX = DIR_OFFSET_EOD - 1,
251258
};
252259

253260
static void offset_set(struct dentry *dentry, long offset)
@@ -291,9 +298,10 @@ int simple_offset_add(struct offset_ctx *octx, struct dentry *dentry)
291298
return -EBUSY;
292299

293300
ret = mtree_alloc_cyclic(&octx->mt, &offset, dentry, DIR_OFFSET_MIN,
294-
LONG_MAX, &octx->next_offset, GFP_KERNEL);
295-
if (ret < 0)
296-
return ret;
301+
DIR_OFFSET_MAX, &octx->next_offset,
302+
GFP_KERNEL);
303+
if (unlikely(ret < 0))
304+
return ret == -EBUSY ? -ENOSPC : ret;
297305

298306
offset_set(dentry, offset);
299307
return 0;
@@ -329,38 +337,6 @@ void simple_offset_remove(struct offset_ctx *octx, struct dentry *dentry)
329337
offset_set(dentry, 0);
330338
}
331339

332-
/**
333-
* simple_offset_empty - Check if a dentry can be unlinked
334-
* @dentry: dentry to be tested
335-
*
336-
* Returns 0 if @dentry is a non-empty directory; otherwise returns 1.
337-
*/
338-
int simple_offset_empty(struct dentry *dentry)
339-
{
340-
struct inode *inode = d_inode(dentry);
341-
struct offset_ctx *octx;
342-
struct dentry *child;
343-
unsigned long index;
344-
int ret = 1;
345-
346-
if (!inode || !S_ISDIR(inode->i_mode))
347-
return ret;
348-
349-
index = DIR_OFFSET_MIN;
350-
octx = inode->i_op->get_offset_ctx(inode);
351-
mt_for_each(&octx->mt, child, index, LONG_MAX) {
352-
spin_lock(&child->d_lock);
353-
if (simple_positive(child)) {
354-
spin_unlock(&child->d_lock);
355-
ret = 0;
356-
break;
357-
}
358-
spin_unlock(&child->d_lock);
359-
}
360-
361-
return ret;
362-
}
363-
364340
/**
365341
* simple_offset_rename - handle directory offsets for rename
366342
* @old_dir: parent directory of source entry
@@ -454,14 +430,6 @@ void simple_offset_destroy(struct offset_ctx *octx)
454430
mtree_destroy(&octx->mt);
455431
}
456432

457-
static int offset_dir_open(struct inode *inode, struct file *file)
458-
{
459-
struct offset_ctx *ctx = inode->i_op->get_offset_ctx(inode);
460-
461-
file->private_data = (void *)ctx->next_offset;
462-
return 0;
463-
}
464-
465433
/**
466434
* offset_dir_llseek - Advance the read position of a directory descriptor
467435
* @file: an open directory whose position is to be updated
@@ -475,9 +443,6 @@ static int offset_dir_open(struct inode *inode, struct file *file)
475443
*/
476444
static loff_t offset_dir_llseek(struct file *file, loff_t offset, int whence)
477445
{
478-
struct inode *inode = file->f_inode;
479-
struct offset_ctx *ctx = inode->i_op->get_offset_ctx(inode);
480-
481446
switch (whence) {
482447
case SEEK_CUR:
483448
offset += file->f_pos;
@@ -490,62 +455,89 @@ static loff_t offset_dir_llseek(struct file *file, loff_t offset, int whence)
490455
return -EINVAL;
491456
}
492457

493-
/* In this case, ->private_data is protected by f_pos_lock */
494-
if (!offset)
495-
file->private_data = (void *)ctx->next_offset;
496458
return vfs_setpos(file, offset, LONG_MAX);
497459
}
498460

499-
static struct dentry *offset_find_next(struct offset_ctx *octx, loff_t offset)
461+
static struct dentry *find_positive_dentry(struct dentry *parent,
462+
struct dentry *dentry,
463+
bool next)
500464
{
501-
MA_STATE(mas, &octx->mt, offset, offset);
465+
struct dentry *found = NULL;
466+
467+
spin_lock(&parent->d_lock);
468+
if (next)
469+
dentry = d_next_sibling(dentry);
470+
else if (!dentry)
471+
dentry = d_first_child(parent);
472+
hlist_for_each_entry_from(dentry, d_sib) {
473+
if (!simple_positive(dentry))
474+
continue;
475+
spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED);
476+
if (simple_positive(dentry))
477+
found = dget_dlock(dentry);
478+
spin_unlock(&dentry->d_lock);
479+
if (likely(found))
480+
break;
481+
}
482+
spin_unlock(&parent->d_lock);
483+
return found;
484+
}
485+
486+
static noinline_for_stack struct dentry *
487+
offset_dir_lookup(struct dentry *parent, loff_t offset)
488+
{
489+
struct inode *inode = d_inode(parent);
490+
struct offset_ctx *octx = inode->i_op->get_offset_ctx(inode);
502491
struct dentry *child, *found = NULL;
503492

504-
rcu_read_lock();
505-
child = mas_find(&mas, LONG_MAX);
506-
if (!child)
507-
goto out;
508-
spin_lock(&child->d_lock);
509-
if (simple_positive(child))
510-
found = dget_dlock(child);
511-
spin_unlock(&child->d_lock);
512-
out:
513-
rcu_read_unlock();
493+
MA_STATE(mas, &octx->mt, offset, offset);
494+
495+
if (offset == DIR_OFFSET_FIRST)
496+
found = find_positive_dentry(parent, NULL, false);
497+
else {
498+
rcu_read_lock();
499+
child = mas_find(&mas, DIR_OFFSET_MAX);
500+
found = find_positive_dentry(parent, child, false);
501+
rcu_read_unlock();
502+
}
514503
return found;
515504
}
516505

517506
static bool offset_dir_emit(struct dir_context *ctx, struct dentry *dentry)
518507
{
519508
struct inode *inode = d_inode(dentry);
520-
long offset = dentry2offset(dentry);
521509

522-
return ctx->actor(ctx, dentry->d_name.name, dentry->d_name.len, offset,
523-
inode->i_ino, fs_umode_to_dtype(inode->i_mode));
510+
return dir_emit(ctx, dentry->d_name.name, dentry->d_name.len,
511+
inode->i_ino, fs_umode_to_dtype(inode->i_mode));
524512
}
525513

526-
static void offset_iterate_dir(struct inode *inode, struct dir_context *ctx, long last_index)
514+
static void offset_iterate_dir(struct file *file, struct dir_context *ctx)
527515
{
528-
struct offset_ctx *octx = inode->i_op->get_offset_ctx(inode);
516+
struct dentry *dir = file->f_path.dentry;
529517
struct dentry *dentry;
530518

519+
dentry = offset_dir_lookup(dir, ctx->pos);
520+
if (!dentry)
521+
goto out_eod;
531522
while (true) {
532-
dentry = offset_find_next(octx, ctx->pos);
533-
if (!dentry)
534-
return;
535-
536-
if (dentry2offset(dentry) >= last_index) {
537-
dput(dentry);
538-
return;
539-
}
523+
struct dentry *next;
540524

541-
if (!offset_dir_emit(ctx, dentry)) {
542-
dput(dentry);
543-
return;
544-
}
525+
ctx->pos = dentry2offset(dentry);
526+
if (!offset_dir_emit(ctx, dentry))
527+
break;
545528

546-
ctx->pos = dentry2offset(dentry) + 1;
529+
next = find_positive_dentry(dir, dentry, true);
547530
dput(dentry);
531+
532+
if (!next)
533+
goto out_eod;
534+
dentry = next;
548535
}
536+
dput(dentry);
537+
return;
538+
539+
out_eod:
540+
ctx->pos = DIR_OFFSET_EOD;
549541
}
550542

551543
/**
@@ -565,26 +557,26 @@ static void offset_iterate_dir(struct inode *inode, struct dir_context *ctx, lon
565557
*
566558
* On return, @ctx->pos contains an offset that will read the next entry
567559
* in this directory when offset_readdir() is called again with @ctx.
560+
* Caller places this value in the d_off field of the last entry in the
561+
* user's buffer.
568562
*
569563
* Return values:
570564
* %0 - Complete
571565
*/
572566
static int offset_readdir(struct file *file, struct dir_context *ctx)
573567
{
574568
struct dentry *dir = file->f_path.dentry;
575-
long last_index = (long)file->private_data;
576569

577570
lockdep_assert_held(&d_inode(dir)->i_rwsem);
578571

579572
if (!dir_emit_dots(file, ctx))
580573
return 0;
581-
582-
offset_iterate_dir(d_inode(dir), ctx, last_index);
574+
if (ctx->pos != DIR_OFFSET_EOD)
575+
offset_iterate_dir(file, ctx);
583576
return 0;
584577
}
585578

586579
const struct file_operations simple_offset_dir_operations = {
587-
.open = offset_dir_open,
588580
.llseek = offset_dir_llseek,
589581
.iterate_shared = offset_readdir,
590582
.read = generic_read_dir,

include/linux/fs.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3483,7 +3483,6 @@ struct offset_ctx {
34833483
void simple_offset_init(struct offset_ctx *octx);
34843484
int simple_offset_add(struct offset_ctx *octx, struct dentry *dentry);
34853485
void simple_offset_remove(struct offset_ctx *octx, struct dentry *dentry);
3486-
int simple_offset_empty(struct dentry *dentry);
34873486
int simple_offset_rename(struct inode *old_dir, struct dentry *old_dentry,
34883487
struct inode *new_dir, struct dentry *new_dentry);
34893488
int simple_offset_rename_exchange(struct inode *old_dir,

mm/shmem.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3821,7 +3821,7 @@ static int shmem_unlink(struct inode *dir, struct dentry *dentry)
38213821

38223822
static int shmem_rmdir(struct inode *dir, struct dentry *dentry)
38233823
{
3824-
if (!simple_offset_empty(dentry))
3824+
if (!simple_empty(dentry))
38253825
return -ENOTEMPTY;
38263826

38273827
drop_nlink(d_inode(dentry));
@@ -3878,7 +3878,7 @@ static int shmem_rename2(struct mnt_idmap *idmap,
38783878
return simple_offset_rename_exchange(old_dir, old_dentry,
38793879
new_dir, new_dentry);
38803880

3881-
if (!simple_offset_empty(new_dentry))
3881+
if (!simple_empty(new_dentry))
38823882
return -ENOTEMPTY;
38833883

38843884
if (flags & RENAME_WHITEOUT) {

0 commit comments

Comments
 (0)