Skip to content

Commit acc8f86

Browse files
author
Darrick J. Wong
committed
xfs: attach dquot buffer to dquot log item buffer
Ever since 6.12-rc1, I've observed a pile of warnings from the kernel when running fstests with quotas enabled: WARNING: CPU: 1 PID: 458580 at mm/page_alloc.c:4221 __alloc_pages_noprof+0xc9c/0xf18 CPU: 1 UID: 0 PID: 458580 Comm: xfsaild/sda3 Tainted: G W 6.12.0-rc6-djwa #rc6 6ee3e0e531f6457e2d26aa008a3b65ff184b377c <snip> Call trace: __alloc_pages_noprof+0xc9c/0xf18 alloc_pages_mpol_noprof+0x94/0x240 alloc_pages_noprof+0x68/0xf8 new_slab+0x3e0/0x568 ___slab_alloc+0x5a0/0xb88 __slab_alloc.constprop.0+0x7c/0xf8 __kmalloc_noprof+0x404/0x4d0 xfs_buf_get_map+0x594/0xde0 [xfs 384cb02810558b4c490343c164e9407332118f88] xfs_buf_read_map+0x64/0x2e0 [xfs 384cb02810558b4c490343c164e9407332118f88] xfs_trans_read_buf_map+0x1dc/0x518 [xfs 384cb02810558b4c490343c164e9407332118f88] xfs_qm_dqflush+0xac/0x468 [xfs 384cb02810558b4c490343c164e9407332118f88] xfs_qm_dquot_logitem_push+0xe4/0x148 [xfs 384cb02810558b4c490343c164e9407332118f88] xfsaild+0x3f4/0xde8 [xfs 384cb02810558b4c490343c164e9407332118f88] kthread+0x110/0x128 ret_from_fork+0x10/0x20 ---[ end trace 0000000000000000 ]--- This corresponds to the line: WARN_ON_ONCE(current->flags & PF_MEMALLOC); within the NOFAIL checks. What's happening here is that the XFS AIL is trying to write a disk quota update back into the filesystem, but for that it needs to read the ondisk buffer for the dquot. The buffer is not in memory anymore, probably because it was evicted. Regardless, the buffer cache tries to allocate a new buffer, but those allocations are NOFAIL. The AIL thread has marked itself PF_MEMALLOC (aka noreclaim) since commit 43ff212 ("xfs: on-stack delayed write buffer lists") presumably because reclaim can push on XFS to push on the AIL. An easy way to fix this probably would have been to drop the NOFAIL flag from the xfs_buf allocation and open code a retry loop, but then there's still the problem that for bs>ps filesystems, the buffer itself could require up to 64k worth of pages. Inode items had similar behavior (multi-page cluster buffers that we don't want to allocate in the AIL) which we solved by making transaction precommit attach the inode cluster buffers to the dirty log item. Let's solve the dquot problem in the same way. So: Make a real precommit handler to read the dquot buffer and attach it to the log item; pass it to dqflush in the push method; and have the iodone function detach the buffer once we've flushed everything. Add a state flag to the log item to track when a thread has entered the precommit -> push mechanism to skip the detaching if it turns out that the dquot is very busy, as we don't hold the dquot lock between log item commit and AIL push). Reading and attaching the dquot buffer in the precommit hook is inspired by the work done for inode cluster buffers some time ago. Cc: <[email protected]> # v6.12 Fixes: 903edea ("mm: warn about illegal __GFP_NOFAIL usage in a more appropriate location and manner") Signed-off-by: "Darrick J. Wong" <[email protected]> Reviewed-by: Christoph Hellwig <[email protected]>
1 parent ec88b41 commit acc8f86

File tree

6 files changed

+169
-24
lines changed

6 files changed

+169
-24
lines changed

fs/xfs/xfs_dquot.c

Lines changed: 125 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,30 @@ xfs_dquot_mark_sick(
6868
}
6969
}
7070

71+
/*
72+
* Detach the dquot buffer if it's still attached, because we can get called
73+
* through dqpurge after a log shutdown. Caller must hold the dqflock or have
74+
* otherwise isolated the dquot.
75+
*/
76+
void
77+
xfs_dquot_detach_buf(
78+
struct xfs_dquot *dqp)
79+
{
80+
struct xfs_dq_logitem *qlip = &dqp->q_logitem;
81+
struct xfs_buf *bp = NULL;
82+
83+
spin_lock(&qlip->qli_lock);
84+
if (qlip->qli_item.li_buf) {
85+
bp = qlip->qli_item.li_buf;
86+
qlip->qli_item.li_buf = NULL;
87+
}
88+
spin_unlock(&qlip->qli_lock);
89+
if (bp) {
90+
list_del_init(&qlip->qli_item.li_bio_list);
91+
xfs_buf_rele(bp);
92+
}
93+
}
94+
7195
/*
7296
* This is called to free all the memory associated with a dquot
7397
*/
@@ -76,6 +100,7 @@ xfs_qm_dqdestroy(
76100
struct xfs_dquot *dqp)
77101
{
78102
ASSERT(list_empty(&dqp->q_lru));
103+
ASSERT(dqp->q_logitem.qli_item.li_buf == NULL);
79104

80105
kvfree(dqp->q_logitem.qli_item.li_lv_shadow);
81106
mutex_destroy(&dqp->q_qlock);
@@ -1146,6 +1171,7 @@ xfs_qm_dqflush_done(
11461171
container_of(lip, struct xfs_dq_logitem, qli_item);
11471172
struct xfs_dquot *dqp = qlip->qli_dquot;
11481173
struct xfs_ail *ailp = lip->li_ailp;
1174+
struct xfs_buf *bp = NULL;
11491175
xfs_lsn_t tail_lsn;
11501176

11511177
/*
@@ -1171,6 +1197,20 @@ xfs_qm_dqflush_done(
11711197
}
11721198
}
11731199

1200+
/*
1201+
* If this dquot hasn't been dirtied since initiating the last dqflush,
1202+
* release the buffer reference. We already unlinked this dquot item
1203+
* from the buffer.
1204+
*/
1205+
spin_lock(&qlip->qli_lock);
1206+
if (!qlip->qli_dirty) {
1207+
bp = lip->li_buf;
1208+
lip->li_buf = NULL;
1209+
}
1210+
spin_unlock(&qlip->qli_lock);
1211+
if (bp)
1212+
xfs_buf_rele(bp);
1213+
11741214
/*
11751215
* Release the dq's flush lock since we're done with it.
11761216
*/
@@ -1197,7 +1237,7 @@ xfs_buf_dquot_io_fail(
11971237

11981238
spin_lock(&bp->b_mount->m_ail->ail_lock);
11991239
list_for_each_entry(lip, &bp->b_li_list, li_bio_list)
1200-
xfs_set_li_failed(lip, bp);
1240+
set_bit(XFS_LI_FAILED, &lip->li_flags);
12011241
spin_unlock(&bp->b_mount->m_ail->ail_lock);
12021242
}
12031243

@@ -1249,14 +1289,15 @@ int
12491289
xfs_dquot_read_buf(
12501290
struct xfs_trans *tp,
12511291
struct xfs_dquot *dqp,
1292+
xfs_buf_flags_t xbf_flags,
12521293
struct xfs_buf **bpp)
12531294
{
12541295
struct xfs_mount *mp = dqp->q_mount;
12551296
struct xfs_buf *bp = NULL;
12561297
int error;
12571298

12581299
error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, dqp->q_blkno,
1259-
mp->m_quotainfo->qi_dqchunklen, XBF_TRYLOCK,
1300+
mp->m_quotainfo->qi_dqchunklen, xbf_flags,
12601301
&bp, &xfs_dquot_buf_ops);
12611302
if (error == -EAGAIN)
12621303
return error;
@@ -1275,6 +1316,77 @@ xfs_dquot_read_buf(
12751316
return error;
12761317
}
12771318

1319+
/*
1320+
* Attach a dquot buffer to this dquot to avoid allocating a buffer during a
1321+
* dqflush, since dqflush can be called from reclaim context.
1322+
*/
1323+
int
1324+
xfs_dquot_attach_buf(
1325+
struct xfs_trans *tp,
1326+
struct xfs_dquot *dqp)
1327+
{
1328+
struct xfs_dq_logitem *qlip = &dqp->q_logitem;
1329+
struct xfs_log_item *lip = &qlip->qli_item;
1330+
int error;
1331+
1332+
spin_lock(&qlip->qli_lock);
1333+
if (!lip->li_buf) {
1334+
struct xfs_buf *bp = NULL;
1335+
1336+
spin_unlock(&qlip->qli_lock);
1337+
error = xfs_dquot_read_buf(tp, dqp, 0, &bp);
1338+
if (error)
1339+
return error;
1340+
1341+
/*
1342+
* Attach the dquot to the buffer so that the AIL does not have
1343+
* to read the dquot buffer to push this item.
1344+
*/
1345+
xfs_buf_hold(bp);
1346+
spin_lock(&qlip->qli_lock);
1347+
lip->li_buf = bp;
1348+
xfs_trans_brelse(tp, bp);
1349+
}
1350+
qlip->qli_dirty = true;
1351+
spin_unlock(&qlip->qli_lock);
1352+
1353+
return 0;
1354+
}
1355+
1356+
/*
1357+
* Get a new reference the dquot buffer attached to this dquot for a dqflush
1358+
* operation.
1359+
*
1360+
* Returns 0 and a NULL bp if none was attached to the dquot; 0 and a locked
1361+
* bp; or -EAGAIN if the buffer could not be locked.
1362+
*/
1363+
int
1364+
xfs_dquot_use_attached_buf(
1365+
struct xfs_dquot *dqp,
1366+
struct xfs_buf **bpp)
1367+
{
1368+
struct xfs_buf *bp = dqp->q_logitem.qli_item.li_buf;
1369+
1370+
/*
1371+
* A NULL buffer can happen if the dquot dirty flag was set but the
1372+
* filesystem shut down before transaction commit happened. In that
1373+
* case we're not going to flush anyway.
1374+
*/
1375+
if (!bp) {
1376+
ASSERT(xfs_is_shutdown(dqp->q_mount));
1377+
1378+
*bpp = NULL;
1379+
return 0;
1380+
}
1381+
1382+
if (!xfs_buf_trylock(bp))
1383+
return -EAGAIN;
1384+
1385+
xfs_buf_hold(bp);
1386+
*bpp = bp;
1387+
return 0;
1388+
}
1389+
12781390
/*
12791391
* Write a modified dquot to disk.
12801392
* The dquot must be locked and the flush lock too taken by caller.
@@ -1289,7 +1401,8 @@ xfs_qm_dqflush(
12891401
struct xfs_buf *bp)
12901402
{
12911403
struct xfs_mount *mp = dqp->q_mount;
1292-
struct xfs_log_item *lip = &dqp->q_logitem.qli_item;
1404+
struct xfs_dq_logitem *qlip = &dqp->q_logitem;
1405+
struct xfs_log_item *lip = &qlip->qli_item;
12931406
struct xfs_dqblk *dqblk;
12941407
xfs_failaddr_t fa;
12951408
int error;
@@ -1319,8 +1432,15 @@ xfs_qm_dqflush(
13191432
*/
13201433
dqp->q_flags &= ~XFS_DQFLAG_DIRTY;
13211434

1322-
xfs_trans_ail_copy_lsn(mp->m_ail, &dqp->q_logitem.qli_flush_lsn,
1323-
&lip->li_lsn);
1435+
/*
1436+
* We hold the dquot lock, so nobody can dirty it while we're
1437+
* scheduling the write out. Clear the dirty-since-flush flag.
1438+
*/
1439+
spin_lock(&qlip->qli_lock);
1440+
qlip->qli_dirty = false;
1441+
spin_unlock(&qlip->qli_lock);
1442+
1443+
xfs_trans_ail_copy_lsn(mp->m_ail, &qlip->qli_flush_lsn, &lip->li_lsn);
13241444

13251445
/*
13261446
* copy the lsn into the on-disk dquot now while we have the in memory

fs/xfs/xfs_dquot.h

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -215,7 +215,7 @@ void xfs_dquot_to_disk(struct xfs_disk_dquot *ddqp, struct xfs_dquot *dqp);
215215

216216
void xfs_qm_dqdestroy(struct xfs_dquot *dqp);
217217
int xfs_dquot_read_buf(struct xfs_trans *tp, struct xfs_dquot *dqp,
218-
struct xfs_buf **bpp);
218+
xfs_buf_flags_t flags, struct xfs_buf **bpp);
219219
int xfs_qm_dqflush(struct xfs_dquot *dqp, struct xfs_buf *bp);
220220
void xfs_qm_dqunpin_wait(struct xfs_dquot *dqp);
221221
void xfs_qm_adjust_dqtimers(struct xfs_dquot *d);
@@ -239,6 +239,10 @@ void xfs_dqlockn(struct xfs_dqtrx *q);
239239

240240
void xfs_dquot_set_prealloc_limits(struct xfs_dquot *);
241241

242+
int xfs_dquot_attach_buf(struct xfs_trans *tp, struct xfs_dquot *dqp);
243+
int xfs_dquot_use_attached_buf(struct xfs_dquot *dqp, struct xfs_buf **bpp);
244+
void xfs_dquot_detach_buf(struct xfs_dquot *dqp);
245+
242246
static inline struct xfs_dquot *xfs_qm_dqhold(struct xfs_dquot *dqp)
243247
{
244248
xfs_dqlock(dqp);

fs/xfs/xfs_dquot_item.c

Lines changed: 25 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -123,8 +123,9 @@ xfs_qm_dquot_logitem_push(
123123
__releases(&lip->li_ailp->ail_lock)
124124
__acquires(&lip->li_ailp->ail_lock)
125125
{
126-
struct xfs_dquot *dqp = DQUOT_ITEM(lip)->qli_dquot;
127-
struct xfs_buf *bp = lip->li_buf;
126+
struct xfs_dq_logitem *qlip = DQUOT_ITEM(lip);
127+
struct xfs_dquot *dqp = qlip->qli_dquot;
128+
struct xfs_buf *bp;
128129
uint rval = XFS_ITEM_SUCCESS;
129130
int error;
130131

@@ -155,11 +156,10 @@ xfs_qm_dquot_logitem_push(
155156

156157
spin_unlock(&lip->li_ailp->ail_lock);
157158

158-
error = xfs_dquot_read_buf(NULL, dqp, &bp);
159-
if (error) {
160-
if (error == -EAGAIN)
161-
rval = XFS_ITEM_LOCKED;
159+
error = xfs_dquot_use_attached_buf(dqp, &bp);
160+
if (error == -EAGAIN) {
162161
xfs_dqfunlock(dqp);
162+
rval = XFS_ITEM_LOCKED;
163163
goto out_relock_ail;
164164
}
165165

@@ -207,12 +207,10 @@ xfs_qm_dquot_logitem_committing(
207207
}
208208

209209
#ifdef DEBUG_EXPENSIVE
210-
static int
211-
xfs_qm_dquot_logitem_precommit(
212-
struct xfs_trans *tp,
213-
struct xfs_log_item *lip)
210+
static void
211+
xfs_qm_dquot_logitem_precommit_check(
212+
struct xfs_dquot *dqp)
214213
{
215-
struct xfs_dquot *dqp = DQUOT_ITEM(lip)->qli_dquot;
216214
struct xfs_mount *mp = dqp->q_mount;
217215
struct xfs_disk_dquot ddq = { };
218216
xfs_failaddr_t fa;
@@ -228,13 +226,24 @@ xfs_qm_dquot_logitem_precommit(
228226
xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
229227
ASSERT(fa == NULL);
230228
}
231-
232-
return 0;
233229
}
234230
#else
235-
# define xfs_qm_dquot_logitem_precommit NULL
231+
# define xfs_qm_dquot_logitem_precommit_check(...) ((void)0)
236232
#endif
237233

234+
static int
235+
xfs_qm_dquot_logitem_precommit(
236+
struct xfs_trans *tp,
237+
struct xfs_log_item *lip)
238+
{
239+
struct xfs_dq_logitem *qlip = DQUOT_ITEM(lip);
240+
struct xfs_dquot *dqp = qlip->qli_dquot;
241+
242+
xfs_qm_dquot_logitem_precommit_check(dqp);
243+
244+
return xfs_dquot_attach_buf(tp, dqp);
245+
}
246+
238247
static const struct xfs_item_ops xfs_dquot_item_ops = {
239248
.iop_size = xfs_qm_dquot_logitem_size,
240249
.iop_precommit = xfs_qm_dquot_logitem_precommit,
@@ -259,5 +268,7 @@ xfs_qm_dquot_logitem_init(
259268

260269
xfs_log_item_init(dqp->q_mount, &lp->qli_item, XFS_LI_DQUOT,
261270
&xfs_dquot_item_ops);
271+
spin_lock_init(&lp->qli_lock);
262272
lp->qli_dquot = dqp;
273+
lp->qli_dirty = false;
263274
}

fs/xfs/xfs_dquot_item.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,13 @@ struct xfs_dq_logitem {
1414
struct xfs_log_item qli_item; /* common portion */
1515
struct xfs_dquot *qli_dquot; /* dquot ptr */
1616
xfs_lsn_t qli_flush_lsn; /* lsn at last flush */
17+
18+
/*
19+
* We use this spinlock to coordinate access to the li_buf pointer in
20+
* the log item and the qli_dirty flag.
21+
*/
22+
spinlock_t qli_lock;
23+
bool qli_dirty; /* dirtied since last flush? */
1724
};
1825

1926
void xfs_qm_dquot_logitem_init(struct xfs_dquot *dqp);

fs/xfs/xfs_qm.c

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -148,7 +148,7 @@ xfs_qm_dqpurge(
148148
* We don't care about getting disk errors here. We need
149149
* to purge this dquot anyway, so we go ahead regardless.
150150
*/
151-
error = xfs_dquot_read_buf(NULL, dqp, &bp);
151+
error = xfs_dquot_read_buf(NULL, dqp, XBF_TRYLOCK, &bp);
152152
if (error == -EAGAIN) {
153153
xfs_dqfunlock(dqp);
154154
dqp->q_flags &= ~XFS_DQFLAG_FREEING;
@@ -168,6 +168,7 @@ xfs_qm_dqpurge(
168168
}
169169
xfs_dqflock(dqp);
170170
}
171+
xfs_dquot_detach_buf(dqp);
171172

172173
out_funlock:
173174
ASSERT(atomic_read(&dqp->q_pincount) == 0);
@@ -505,7 +506,7 @@ xfs_qm_dquot_isolate(
505506
/* we have to drop the LRU lock to flush the dquot */
506507
spin_unlock(&lru->lock);
507508

508-
error = xfs_dquot_read_buf(NULL, dqp, &bp);
509+
error = xfs_dquot_read_buf(NULL, dqp, XBF_TRYLOCK, &bp);
509510
if (error) {
510511
xfs_dqfunlock(dqp);
511512
goto out_unlock_dirty;
@@ -523,6 +524,8 @@ xfs_qm_dquot_isolate(
523524
xfs_buf_relse(bp);
524525
goto out_unlock_dirty;
525526
}
527+
528+
xfs_dquot_detach_buf(dqp);
526529
xfs_dqfunlock(dqp);
527530

528531
/*
@@ -1510,7 +1513,7 @@ xfs_qm_flush_one(
15101513
goto out_unlock;
15111514
}
15121515

1513-
error = xfs_dquot_read_buf(NULL, dqp, &bp);
1516+
error = xfs_dquot_read_buf(NULL, dqp, XBF_TRYLOCK, &bp);
15141517
if (error)
15151518
goto out_unlock;
15161519

fs/xfs/xfs_trans_ail.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -360,7 +360,7 @@ xfsaild_resubmit_item(
360360

361361
/* protected by ail_lock */
362362
list_for_each_entry(lip, &bp->b_li_list, li_bio_list) {
363-
if (bp->b_flags & _XBF_INODES)
363+
if (bp->b_flags & (_XBF_INODES | _XBF_DQUOTS))
364364
clear_bit(XFS_LI_FAILED, &lip->li_flags);
365365
else
366366
xfs_clear_li_failed(lip);

0 commit comments

Comments
 (0)