Skip to content

Commit f0f7a67

Browse files
committed
xfs: move inode flush to the sync workqueue
Move the inode dirty data flushing to a workqueue so that multiple threads can take advantage of a single thread's flushing work. The ratelimiting technique used in bdd4ee4 was not successful, because threads that skipped the inode flush scan due to ratelimiting would ENOSPC early, which caused occasional (but noticeable) changes in behavior and sporadic fstest regressions. Therefore, make all the writer threads wait on a single inode flush, which eliminates both the stampeding hordes of flushers and the small window in which a write could fail with ENOSPC because it lost the ratelimit race after even another thread freed space. Fixes: c642570 ("xfs: ratelimit inode flush on buffered write ENOSPC") Signed-off-by: Darrick J. Wong <[email protected]> Reviewed-by: Brian Foster <[email protected]>
1 parent c142932 commit f0f7a67

File tree

2 files changed

+27
-19
lines changed

2 files changed

+27
-19
lines changed

fs/xfs/xfs_mount.h

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -167,8 +167,12 @@ typedef struct xfs_mount {
167167
struct xfs_kobj m_error_meta_kobj;
168168
struct xfs_error_cfg m_error_cfg[XFS_ERR_CLASS_MAX][XFS_ERR_ERRNO_MAX];
169169
struct xstats m_stats; /* per-fs stats */
170-
struct ratelimit_state m_flush_inodes_ratelimit;
171170

171+
/*
172+
* Workqueue item so that we can coalesce multiple inode flush attempts
173+
* into a single flush.
174+
*/
175+
struct work_struct m_flush_inodes_work;
172176
struct workqueue_struct *m_buf_workqueue;
173177
struct workqueue_struct *m_unwritten_workqueue;
174178
struct workqueue_struct *m_cil_workqueue;

fs/xfs/xfs_super.c

Lines changed: 22 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -516,6 +516,20 @@ xfs_destroy_mount_workqueues(
516516
destroy_workqueue(mp->m_buf_workqueue);
517517
}
518518

519+
static void
520+
xfs_flush_inodes_worker(
521+
struct work_struct *work)
522+
{
523+
struct xfs_mount *mp = container_of(work, struct xfs_mount,
524+
m_flush_inodes_work);
525+
struct super_block *sb = mp->m_super;
526+
527+
if (down_read_trylock(&sb->s_umount)) {
528+
sync_inodes_sb(sb);
529+
up_read(&sb->s_umount);
530+
}
531+
}
532+
519533
/*
520534
* Flush all dirty data to disk. Must not be called while holding an XFS_ILOCK
521535
* or a page lock. We use sync_inodes_sb() here to ensure we block while waiting
@@ -526,15 +540,15 @@ void
526540
xfs_flush_inodes(
527541
struct xfs_mount *mp)
528542
{
529-
struct super_block *sb = mp->m_super;
530-
531-
if (!__ratelimit(&mp->m_flush_inodes_ratelimit))
543+
/*
544+
* If flush_work() returns true then that means we waited for a flush
545+
* which was already in progress. Don't bother running another scan.
546+
*/
547+
if (flush_work(&mp->m_flush_inodes_work))
532548
return;
533549

534-
if (down_read_trylock(&sb->s_umount)) {
535-
sync_inodes_sb(sb);
536-
up_read(&sb->s_umount);
537-
}
550+
queue_work(mp->m_sync_workqueue, &mp->m_flush_inodes_work);
551+
flush_work(&mp->m_flush_inodes_work);
538552
}
539553

540554
/* Catch misguided souls that try to use this interface on XFS */
@@ -1369,17 +1383,6 @@ xfs_fc_fill_super(
13691383
if (error)
13701384
goto out_free_names;
13711385

1372-
/*
1373-
* Cap the number of invocations of xfs_flush_inodes to 16 for every
1374-
* quarter of a second. The magic numbers here were determined by
1375-
* observation neither to cause stalls in writeback when there are a
1376-
* lot of IO threads and the fs is near ENOSPC, nor cause any fstest
1377-
* regressions. YMMV.
1378-
*/
1379-
ratelimit_state_init(&mp->m_flush_inodes_ratelimit, HZ / 4, 16);
1380-
ratelimit_set_flags(&mp->m_flush_inodes_ratelimit,
1381-
RATELIMIT_MSG_ON_RELEASE);
1382-
13831386
error = xfs_init_mount_workqueues(mp);
13841387
if (error)
13851388
goto out_close_devices;
@@ -1752,6 +1755,7 @@ static int xfs_init_fs_context(
17521755
spin_lock_init(&mp->m_perag_lock);
17531756
mutex_init(&mp->m_growlock);
17541757
atomic_set(&mp->m_active_trans, 0);
1758+
INIT_WORK(&mp->m_flush_inodes_work, xfs_flush_inodes_worker);
17551759
INIT_DELAYED_WORK(&mp->m_reclaim_work, xfs_reclaim_worker);
17561760
INIT_DELAYED_WORK(&mp->m_eofblocks_work, xfs_eofblocks_worker);
17571761
INIT_DELAYED_WORK(&mp->m_cowblocks_work, xfs_cowblocks_worker);

0 commit comments

Comments
 (0)