Skip to content

Commit 8c3c074

Browse files
committed
Merge tag 'xfs-5.7-merge-12' of git://git.kernel.org/pub/scm/fs/xfs/xfs-linux
Pull more xfs updates from Darrick Wong: "As promised last week, this batch changes how xfs interacts with memory reclaim; how the log batches and throttles log items; how hard writes near ENOSPC will try to squeeze more space out of the filesystem; and hopefully fix the last of the umount hangs after a catastrophic failure. Summary: - Validate the realtime geometry in the superblock when mounting - Refactor a bunch of tricky flag handling in the log code - Flush the CIL more judiciously so that we don't wait until there are millions of log items consuming a lot of memory. - Throttle transaction commits to prevent the xfs frontend from flooding the CIL with too many log items. - Account metadata buffers correctly for memory reclaim. - Mark slabs properly for memory reclaim. These should help reclaim run more effectively when XFS is using a lot of memory. - Don't write a garbage log record at unmount time if we're trying to trigger summary counter recalculation at next mount. - Don't block the AIL on locked dquot/inode buffers; instead trigger its backoff mechanism to give the lock holder a chance to finish up. - Ratelimit writeback flushing when buffered writes encounter ENOSPC. - Other minor cleanups. - Make reflink a synchronous operation when the fs is mounted with wsync or sync, which means that now we force the log to disk to record the changes" * tag 'xfs-5.7-merge-12' of git://git.kernel.org/pub/scm/fs/xfs/xfs-linux: (26 commits) xfs: reflink should force the log out if mounted with wsync xfs: factor out a new xfs_log_force_inode helper xfs: fix inode number overflow in ifree cluster helper xfs: remove redundant variable assignment in xfs_symlink() xfs: ratelimit inode flush on buffered write ENOSPC xfs: return locked status of inode buffer on xfsaild push xfs: trylock underlying buffer on dquot flush xfs: remove unnecessary ternary from xfs_create xfs: don't write a corrupt unmount record to force summary counter recalc xfs: factor inode lookup from xfs_ifree_cluster xfs: tail updates only need to occur when LSN changes xfs: factor common AIL item deletion code xfs: correctly acount for reclaimable slabs xfs: Improve metadata buffer reclaim accountability xfs: don't allow log IO to be throttled xfs: Throttle commits on delayed background CIL push xfs: Lower CIL flush limit for large logs xfs: remove some stale comments from the log code xfs: refactor unmount record writing xfs: merge xlog_commit_record with xlog_write_done ...
2 parents d3e5e97 + 5833112 commit 8c3c074

21 files changed

+512
-451
lines changed

fs/xfs/libxfs/xfs_sb.c

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -328,6 +328,38 @@ xfs_validate_sb_common(
328328
return -EFSCORRUPTED;
329329
}
330330

331+
/* Validate the realtime geometry; stolen from xfs_repair */
332+
if (sbp->sb_rextsize * sbp->sb_blocksize > XFS_MAX_RTEXTSIZE ||
333+
sbp->sb_rextsize * sbp->sb_blocksize < XFS_MIN_RTEXTSIZE) {
334+
xfs_notice(mp,
335+
"realtime extent sanity check failed");
336+
return -EFSCORRUPTED;
337+
}
338+
339+
if (sbp->sb_rblocks == 0) {
340+
if (sbp->sb_rextents != 0 || sbp->sb_rbmblocks != 0 ||
341+
sbp->sb_rextslog != 0 || sbp->sb_frextents != 0) {
342+
xfs_notice(mp,
343+
"realtime zeroed geometry check failed");
344+
return -EFSCORRUPTED;
345+
}
346+
} else {
347+
uint64_t rexts;
348+
uint64_t rbmblocks;
349+
350+
rexts = div_u64(sbp->sb_rblocks, sbp->sb_rextsize);
351+
rbmblocks = howmany_64(sbp->sb_rextents,
352+
NBBY * sbp->sb_blocksize);
353+
354+
if (sbp->sb_rextents != rexts ||
355+
sbp->sb_rextslog != xfs_highbit32(sbp->sb_rextents) ||
356+
sbp->sb_rbmblocks != rbmblocks) {
357+
xfs_notice(mp,
358+
"realtime geometry sanity check failed");
359+
return -EFSCORRUPTED;
360+
}
361+
}
362+
331363
if (sbp->sb_unit) {
332364
if (!xfs_sb_version_hasdalign(sbp) ||
333365
sbp->sb_unit > sbp->sb_width ||

fs/xfs/xfs_buf.c

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -327,6 +327,9 @@ xfs_buf_free(
327327

328328
__free_page(page);
329329
}
330+
if (current->reclaim_state)
331+
current->reclaim_state->reclaimed_slab +=
332+
bp->b_page_count;
330333
} else if (bp->b_flags & _XBF_KMEM)
331334
kmem_free(bp->b_addr);
332335
_xfs_buf_free_pages(bp);
@@ -2114,9 +2117,11 @@ xfs_buf_delwri_pushbuf(
21142117
int __init
21152118
xfs_buf_init(void)
21162119
{
2117-
xfs_buf_zone = kmem_cache_create("xfs_buf",
2118-
sizeof(struct xfs_buf), 0,
2119-
SLAB_HWCACHE_ALIGN, NULL);
2120+
xfs_buf_zone = kmem_cache_create("xfs_buf", sizeof(struct xfs_buf), 0,
2121+
SLAB_HWCACHE_ALIGN |
2122+
SLAB_RECLAIM_ACCOUNT |
2123+
SLAB_MEM_SPREAD,
2124+
NULL);
21202125
if (!xfs_buf_zone)
21212126
goto out;
21222127

fs/xfs/xfs_dquot.c

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1105,8 +1105,8 @@ xfs_qm_dqflush(
11051105
* Get the buffer containing the on-disk dquot
11061106
*/
11071107
error = xfs_trans_read_buf(mp, NULL, mp->m_ddev_targp, dqp->q_blkno,
1108-
mp->m_quotainfo->qi_dqchunklen, 0, &bp,
1109-
&xfs_dquot_buf_ops);
1108+
mp->m_quotainfo->qi_dqchunklen, XBF_TRYLOCK,
1109+
&bp, &xfs_dquot_buf_ops);
11101110
if (error)
11111111
goto out_unlock;
11121112

@@ -1177,7 +1177,7 @@ xfs_qm_dqflush(
11771177

11781178
out_unlock:
11791179
xfs_dqfunlock(dqp);
1180-
return -EIO;
1180+
return error;
11811181
}
11821182

11831183
/*

fs/xfs/xfs_dquot_item.c

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -189,7 +189,8 @@ xfs_qm_dquot_logitem_push(
189189
if (!xfs_buf_delwri_queue(bp, buffer_list))
190190
rval = XFS_ITEM_FLUSHING;
191191
xfs_buf_relse(bp);
192-
}
192+
} else if (error == -EAGAIN)
193+
rval = XFS_ITEM_LOCKED;
193194

194195
spin_lock(&lip->li_ailp->ail_lock);
195196
out_unlock:

fs/xfs/xfs_export.c

Lines changed: 1 addition & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,6 @@
1515
#include "xfs_trans.h"
1616
#include "xfs_inode_item.h"
1717
#include "xfs_icache.h"
18-
#include "xfs_log.h"
1918
#include "xfs_pnfs.h"
2019

2120
/*
@@ -221,18 +220,7 @@ STATIC int
221220
xfs_fs_nfs_commit_metadata(
222221
struct inode *inode)
223222
{
224-
struct xfs_inode *ip = XFS_I(inode);
225-
struct xfs_mount *mp = ip->i_mount;
226-
xfs_lsn_t lsn = 0;
227-
228-
xfs_ilock(ip, XFS_ILOCK_SHARED);
229-
if (xfs_ipincount(ip))
230-
lsn = ip->i_itemp->ili_last_lsn;
231-
xfs_iunlock(ip, XFS_ILOCK_SHARED);
232-
233-
if (!lsn)
234-
return 0;
235-
return xfs_log_force_lsn(mp, lsn, XFS_LOG_SYNC, NULL);
223+
return xfs_log_force_inode(XFS_I(inode));
236224
}
237225

238226
const struct export_operations xfs_export_operations = {

fs/xfs/xfs_file.c

Lines changed: 5 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -80,19 +80,9 @@ xfs_dir_fsync(
8080
int datasync)
8181
{
8282
struct xfs_inode *ip = XFS_I(file->f_mapping->host);
83-
struct xfs_mount *mp = ip->i_mount;
84-
xfs_lsn_t lsn = 0;
8583

8684
trace_xfs_dir_fsync(ip);
87-
88-
xfs_ilock(ip, XFS_ILOCK_SHARED);
89-
if (xfs_ipincount(ip))
90-
lsn = ip->i_itemp->ili_last_lsn;
91-
xfs_iunlock(ip, XFS_ILOCK_SHARED);
92-
93-
if (!lsn)
94-
return 0;
95-
return xfs_log_force_lsn(mp, lsn, XFS_LOG_SYNC, NULL);
85+
return xfs_log_force_inode(ip);
9686
}
9787

9888
STATIC int
@@ -1069,7 +1059,11 @@ xfs_file_remap_range(
10691059

10701060
ret = xfs_reflink_update_dest(dest, pos_out + len, cowextsize,
10711061
remap_flags);
1062+
if (ret)
1063+
goto out_unlock;
10721064

1065+
if (mp->m_flags & XFS_MOUNT_WSYNC)
1066+
xfs_log_force_inode(dest);
10731067
out_unlock:
10741068
xfs_reflink_remap_unlock(file_in, file_out);
10751069
if (ret)

fs/xfs/xfs_inode.c

Lines changed: 104 additions & 70 deletions
Original file line numberDiff line numberDiff line change
@@ -1200,8 +1200,7 @@ xfs_create(
12001200
unlock_dp_on_error = false;
12011201

12021202
error = xfs_dir_createname(tp, dp, name, ip->i_ino,
1203-
resblks ?
1204-
resblks - XFS_IALLOC_SPACE_RES(mp) : 0);
1203+
resblks - XFS_IALLOC_SPACE_RES(mp));
12051204
if (error) {
12061205
ASSERT(error != -ENOSPC);
12071206
goto out_trans_cancel;
@@ -2503,6 +2502,88 @@ xfs_iunlink_remove(
25032502
return error;
25042503
}
25052504

2505+
/*
2506+
* Look up the inode number specified and mark it stale if it is found. If it is
2507+
* dirty, return the inode so it can be attached to the cluster buffer so it can
2508+
* be processed appropriately when the cluster free transaction completes.
2509+
*/
2510+
static struct xfs_inode *
2511+
xfs_ifree_get_one_inode(
2512+
struct xfs_perag *pag,
2513+
struct xfs_inode *free_ip,
2514+
xfs_ino_t inum)
2515+
{
2516+
struct xfs_mount *mp = pag->pag_mount;
2517+
struct xfs_inode *ip;
2518+
2519+
retry:
2520+
rcu_read_lock();
2521+
ip = radix_tree_lookup(&pag->pag_ici_root, XFS_INO_TO_AGINO(mp, inum));
2522+
2523+
/* Inode not in memory, nothing to do */
2524+
if (!ip)
2525+
goto out_rcu_unlock;
2526+
2527+
/*
2528+
* because this is an RCU protected lookup, we could find a recently
2529+
* freed or even reallocated inode during the lookup. We need to check
2530+
* under the i_flags_lock for a valid inode here. Skip it if it is not
2531+
* valid, the wrong inode or stale.
2532+
*/
2533+
spin_lock(&ip->i_flags_lock);
2534+
if (ip->i_ino != inum || __xfs_iflags_test(ip, XFS_ISTALE)) {
2535+
spin_unlock(&ip->i_flags_lock);
2536+
goto out_rcu_unlock;
2537+
}
2538+
spin_unlock(&ip->i_flags_lock);
2539+
2540+
/*
2541+
* Don't try to lock/unlock the current inode, but we _cannot_ skip the
2542+
* other inodes that we did not find in the list attached to the buffer
2543+
* and are not already marked stale. If we can't lock it, back off and
2544+
* retry.
2545+
*/
2546+
if (ip != free_ip) {
2547+
if (!xfs_ilock_nowait(ip, XFS_ILOCK_EXCL)) {
2548+
rcu_read_unlock();
2549+
delay(1);
2550+
goto retry;
2551+
}
2552+
2553+
/*
2554+
* Check the inode number again in case we're racing with
2555+
* freeing in xfs_reclaim_inode(). See the comments in that
2556+
* function for more information as to why the initial check is
2557+
* not sufficient.
2558+
*/
2559+
if (ip->i_ino != inum) {
2560+
xfs_iunlock(ip, XFS_ILOCK_EXCL);
2561+
goto out_rcu_unlock;
2562+
}
2563+
}
2564+
rcu_read_unlock();
2565+
2566+
xfs_iflock(ip);
2567+
xfs_iflags_set(ip, XFS_ISTALE);
2568+
2569+
/*
2570+
* We don't need to attach clean inodes or those only with unlogged
2571+
* changes (which we throw away, anyway).
2572+
*/
2573+
if (!ip->i_itemp || xfs_inode_clean(ip)) {
2574+
ASSERT(ip != free_ip);
2575+
xfs_ifunlock(ip);
2576+
xfs_iunlock(ip, XFS_ILOCK_EXCL);
2577+
goto out_no_inode;
2578+
}
2579+
return ip;
2580+
2581+
out_rcu_unlock:
2582+
rcu_read_unlock();
2583+
out_no_inode:
2584+
return NULL;
2585+
}
2586+
25062587
/*
25072588
* A big issue when freeing the inode cluster is that we _cannot_ skip any
25082589
* inodes that are in memory - they all must be marked stale and attached to
@@ -2603,77 +2684,11 @@ xfs_ifree_cluster(
26032684
* even trying to lock them.
26042685
*/
26052686
for (i = 0; i < igeo->inodes_per_cluster; i++) {
2606-
retry:
2607-
rcu_read_lock();
2608-
ip = radix_tree_lookup(&pag->pag_ici_root,
2609-
XFS_INO_TO_AGINO(mp, (inum + i)));
2610-
2611-
/* Inode not in memory, nothing to do */
2612-
if (!ip) {
2613-
rcu_read_unlock();
2687+
ip = xfs_ifree_get_one_inode(pag, free_ip, inum + i);
2688+
if (!ip)
26142689
continue;
2615-
}
2616-
2617-
/*
2618-
* because this is an RCU protected lookup, we could
2619-
* find a recently freed or even reallocated inode
2620-
* during the lookup. We need to check under the
2621-
* i_flags_lock for a valid inode here. Skip it if it
2622-
* is not valid, the wrong inode or stale.
2623-
*/
2624-
spin_lock(&ip->i_flags_lock);
2625-
if (ip->i_ino != inum + i ||
2626-
__xfs_iflags_test(ip, XFS_ISTALE)) {
2627-
spin_unlock(&ip->i_flags_lock);
2628-
rcu_read_unlock();
2629-
continue;
2630-
}
2631-
spin_unlock(&ip->i_flags_lock);
2632-
2633-
/*
2634-
* Don't try to lock/unlock the current inode, but we
2635-
* _cannot_ skip the other inodes that we did not find
2636-
* in the list attached to the buffer and are not
2637-
* already marked stale. If we can't lock it, back off
2638-
* and retry.
2639-
*/
2640-
if (ip != free_ip) {
2641-
if (!xfs_ilock_nowait(ip, XFS_ILOCK_EXCL)) {
2642-
rcu_read_unlock();
2643-
delay(1);
2644-
goto retry;
2645-
}
2646-
2647-
/*
2648-
* Check the inode number again in case we're
2649-
* racing with freeing in xfs_reclaim_inode().
2650-
* See the comments in that function for more
2651-
* information as to why the initial check is
2652-
* not sufficient.
2653-
*/
2654-
if (ip->i_ino != inum + i) {
2655-
xfs_iunlock(ip, XFS_ILOCK_EXCL);
2656-
rcu_read_unlock();
2657-
continue;
2658-
}
2659-
}
2660-
rcu_read_unlock();
2661-
2662-
xfs_iflock(ip);
2663-
xfs_iflags_set(ip, XFS_ISTALE);
26642690

2665-
/*
2666-
* we don't need to attach clean inodes or those only
2667-
* with unlogged changes (which we throw away, anyway).
2668-
*/
26692691
iip = ip->i_itemp;
2670-
if (!iip || xfs_inode_clean(ip)) {
2671-
ASSERT(ip != free_ip);
2672-
xfs_ifunlock(ip);
2673-
xfs_iunlock(ip, XFS_ILOCK_EXCL);
2674-
continue;
2675-
}
2676-
26772692
iip->ili_last_fields = iip->ili_fields;
26782693
iip->ili_fields = 0;
26792694
iip->ili_fsync_fields = 0;
@@ -3930,3 +3945,22 @@ xfs_irele(
39303945
trace_xfs_irele(ip, _RET_IP_);
39313946
iput(VFS_I(ip));
39323947
}
3948+
3949+
/*
3950+
* Ensure all commited transactions touching the inode are written to the log.
3951+
*/
3952+
int
3953+
xfs_log_force_inode(
3954+
struct xfs_inode *ip)
3955+
{
3956+
xfs_lsn_t lsn = 0;
3957+
3958+
xfs_ilock(ip, XFS_ILOCK_SHARED);
3959+
if (xfs_ipincount(ip))
3960+
lsn = ip->i_itemp->ili_last_lsn;
3961+
xfs_iunlock(ip, XFS_ILOCK_SHARED);
3962+
3963+
if (!lsn)
3964+
return 0;
3965+
return xfs_log_force_lsn(ip->i_mount, lsn, XFS_LOG_SYNC, NULL);
3966+
}

fs/xfs/xfs_inode.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -426,6 +426,7 @@ int xfs_itruncate_extents_flags(struct xfs_trans **,
426426
struct xfs_inode *, int, xfs_fsize_t, int);
427427
void xfs_iext_realloc(xfs_inode_t *, int, int);
428428

429+
int xfs_log_force_inode(struct xfs_inode *ip);
429430
void xfs_iunpin_wait(xfs_inode_t *);
430431
#define xfs_ipincount(ip) ((unsigned int) atomic_read(&ip->i_pincount))
431432

0 commit comments

Comments
 (0)