Skip to content

Commit 03dc748

Browse files
committed
Merge tag 'xfs-5.12-merge-6' of git://git.kernel.org/pub/scm/fs/xfs/xfs-linux
Pull more xfs updates from Darrick Wong: "The most notable fix here prevents premature reuse of freed metadata blocks, and adding the ability to detect accidental nested transactions, which are not allowed here. - Restore a disused sysctl control knob that was inadvertently dropped during the merge window to avoid fstests regressions. - Don't speculatively release freed blocks from the busy list until we're actually allocating them, which fixes a rare log recovery regression. - Don't nest transactions when scanning for free space. - Add an idiot^Wmaintainer light to detect nested transactions. ;)" * tag 'xfs-5.12-merge-6' of git://git.kernel.org/pub/scm/fs/xfs/xfs-linux: xfs: use current->journal_info for detecting transaction recursion xfs: don't nest transactions when scanning for eofblocks xfs: don't reuse busy extents on extent trim xfs: restore speculative_cow_prealloc_lifetime sysctl
2 parents 3ab6608 + 756b1c3 commit 03dc748

File tree

8 files changed

+94
-70
lines changed

8 files changed

+94
-70
lines changed

Documentation/admin-guide/xfs.rst

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -284,6 +284,9 @@ The following sysctls are available for the XFS filesystem:
284284
removes unused preallocation from clean inodes and releases
285285
the unused space back to the free pool.
286286

287+
fs.xfs.speculative_cow_prealloc_lifetime
288+
This is an alias for speculative_prealloc_lifetime.
289+
287290
fs.xfs.error_level (Min: 0 Default: 3 Max: 11)
288291
A volume knob for error reporting when internal errors occur.
289292
This will generate detailed messages & backtraces for filesystem
@@ -356,12 +359,13 @@ The following sysctls are available for the XFS filesystem:
356359
Deprecated Sysctls
357360
==================
358361

359-
=========================== ================
360-
Name Removal Schedule
361-
=========================== ================
362-
fs.xfs.irix_sgid_inherit September 2025
363-
fs.xfs.irix_symlink_mode September 2025
364-
=========================== ================
362+
=========================================== ================
363+
Name Removal Schedule
364+
=========================================== ================
365+
fs.xfs.irix_sgid_inherit September 2025
366+
fs.xfs.irix_symlink_mode September 2025
367+
fs.xfs.speculative_cow_prealloc_lifetime September 2025
368+
=========================================== ================
365369

366370

367371
Removed Sysctls

fs/iomap/buffered-io.c

Lines changed: 0 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1458,13 +1458,6 @@ iomap_do_writepage(struct page *page, struct writeback_control *wbc, void *data)
14581458
PF_MEMALLOC))
14591459
goto redirty;
14601460

1461-
/*
1462-
* Given that we do not allow direct reclaim to call us, we should
1463-
* never be called in a recursive filesystem reclaim context.
1464-
*/
1465-
if (WARN_ON_ONCE(current->flags & PF_MEMALLOC_NOFS))
1466-
goto redirty;
1467-
14681461
/*
14691462
* Is this page beyond the end of the file?
14701463
*

fs/xfs/libxfs/xfs_btree.c

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2805,7 +2805,7 @@ xfs_btree_split_worker(
28052805
struct xfs_btree_split_args *args = container_of(work,
28062806
struct xfs_btree_split_args, work);
28072807
unsigned long pflags;
2808-
unsigned long new_pflags = PF_MEMALLOC_NOFS;
2808+
unsigned long new_pflags = 0;
28092809

28102810
/*
28112811
* we are in a transaction context here, but may also be doing work
@@ -2817,12 +2817,20 @@ xfs_btree_split_worker(
28172817
new_pflags |= PF_MEMALLOC | PF_SWAPWRITE | PF_KSWAPD;
28182818

28192819
current_set_flags_nested(&pflags, new_pflags);
2820+
xfs_trans_set_context(args->cur->bc_tp);
28202821

28212822
args->result = __xfs_btree_split(args->cur, args->level, args->ptrp,
28222823
args->key, args->curp, args->stat);
2823-
complete(args->done);
28242824

2825+
xfs_trans_clear_context(args->cur->bc_tp);
28252826
current_restore_flags_nested(&pflags, new_pflags);
2827+
2828+
/*
2829+
* Do not access args after complete() has run here. We don't own args
2830+
* and the owner may run and free args before we return here.
2831+
*/
2832+
complete(args->done);
2833+
28262834
}
28272835

28282836
/*

fs/xfs/xfs_aops.c

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,7 @@ xfs_setfilesize_trans_alloc(
6262
* We hand off the transaction to the completion thread now, so
6363
* clear the flag here.
6464
*/
65-
current_restore_flags_nested(&tp->t_pflags, PF_MEMALLOC_NOFS);
65+
xfs_trans_clear_context(tp);
6666
return 0;
6767
}
6868

@@ -125,7 +125,7 @@ xfs_setfilesize_ioend(
125125
* thus we need to mark ourselves as being in a transaction manually.
126126
* Similarly for freeze protection.
127127
*/
128-
current_set_flags_nested(&tp->t_pflags, PF_MEMALLOC_NOFS);
128+
xfs_trans_set_context(tp);
129129
__sb_writers_acquired(VFS_I(ip)->i_sb, SB_FREEZE_FS);
130130

131131
/* we abort the update if there was an IO error */
@@ -568,6 +568,12 @@ xfs_vm_writepage(
568568
{
569569
struct xfs_writepage_ctx wpc = { };
570570

571+
if (WARN_ON_ONCE(current->journal_info)) {
572+
redirty_page_for_writepage(wbc, page);
573+
unlock_page(page);
574+
return 0;
575+
}
576+
571577
return iomap_writepage(page, wbc, &wpc.ctx, &xfs_writeback_ops);
572578
}
573579

@@ -578,6 +584,13 @@ xfs_vm_writepages(
578584
{
579585
struct xfs_writepage_ctx wpc = { };
580586

587+
/*
588+
* Writing back data in a transaction context can result in recursive
589+
* transactions. This is bad, so issue a warning and get out of here.
590+
*/
591+
if (WARN_ON_ONCE(current->journal_info))
592+
return 0;
593+
581594
xfs_iflags_clear(XFS_I(mapping->host), XFS_ITRUNCATED);
582595
return iomap_writepages(mapping, wbc, &wpc.ctx, &xfs_writeback_ops);
583596
}

fs/xfs/xfs_extent_busy.c

Lines changed: 0 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -344,7 +344,6 @@ xfs_extent_busy_trim(
344344
ASSERT(*len > 0);
345345

346346
spin_lock(&args->pag->pagb_lock);
347-
restart:
348347
fbno = *bno;
349348
flen = *len;
350349
rbp = args->pag->pagb_tree.rb_node;
@@ -363,19 +362,6 @@ xfs_extent_busy_trim(
363362
continue;
364363
}
365364

366-
/*
367-
* If this is a metadata allocation, try to reuse the busy
368-
* extent instead of trimming the allocation.
369-
*/
370-
if (!(args->datatype & XFS_ALLOC_USERDATA) &&
371-
!(busyp->flags & XFS_EXTENT_BUSY_DISCARDED)) {
372-
if (!xfs_extent_busy_update_extent(args->mp, args->pag,
373-
busyp, fbno, flen,
374-
false))
375-
goto restart;
376-
continue;
377-
}
378-
379365
if (bbno <= fbno) {
380366
/* start overlap */
381367

fs/xfs/xfs_sysctl.c

Lines changed: 14 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -51,32 +51,16 @@ xfs_panic_mask_proc_handler(
5151
#endif /* CONFIG_PROC_FS */
5252

5353
STATIC int
54-
xfs_deprecate_irix_sgid_inherit_proc_handler(
54+
xfs_deprecated_dointvec_minmax(
5555
struct ctl_table *ctl,
5656
int write,
5757
void *buffer,
5858
size_t *lenp,
5959
loff_t *ppos)
6060
{
6161
if (write) {
62-
printk_once(KERN_WARNING
63-
"XFS: " "%s sysctl option is deprecated.\n",
64-
ctl->procname);
65-
}
66-
return proc_dointvec_minmax(ctl, write, buffer, lenp, ppos);
67-
}
68-
69-
STATIC int
70-
xfs_deprecate_irix_symlink_mode_proc_handler(
71-
struct ctl_table *ctl,
72-
int write,
73-
void *buffer,
74-
size_t *lenp,
75-
loff_t *ppos)
76-
{
77-
if (write) {
78-
printk_once(KERN_WARNING
79-
"XFS: " "%s sysctl option is deprecated.\n",
62+
printk_ratelimited(KERN_WARNING
63+
"XFS: %s sysctl option is deprecated.\n",
8064
ctl->procname);
8165
}
8266
return proc_dointvec_minmax(ctl, write, buffer, lenp, ppos);
@@ -88,7 +72,7 @@ static struct ctl_table xfs_table[] = {
8872
.data = &xfs_params.sgid_inherit.val,
8973
.maxlen = sizeof(int),
9074
.mode = 0644,
91-
.proc_handler = xfs_deprecate_irix_sgid_inherit_proc_handler,
75+
.proc_handler = xfs_deprecated_dointvec_minmax,
9276
.extra1 = &xfs_params.sgid_inherit.min,
9377
.extra2 = &xfs_params.sgid_inherit.max
9478
},
@@ -97,7 +81,7 @@ static struct ctl_table xfs_table[] = {
9781
.data = &xfs_params.symlink_mode.val,
9882
.maxlen = sizeof(int),
9983
.mode = 0644,
100-
.proc_handler = xfs_deprecate_irix_symlink_mode_proc_handler,
84+
.proc_handler = xfs_deprecated_dointvec_minmax,
10185
.extra1 = &xfs_params.symlink_mode.min,
10286
.extra2 = &xfs_params.symlink_mode.max
10387
},
@@ -201,6 +185,15 @@ static struct ctl_table xfs_table[] = {
201185
.extra1 = &xfs_params.blockgc_timer.min,
202186
.extra2 = &xfs_params.blockgc_timer.max,
203187
},
188+
{
189+
.procname = "speculative_cow_prealloc_lifetime",
190+
.data = &xfs_params.blockgc_timer.val,
191+
.maxlen = sizeof(int),
192+
.mode = 0644,
193+
.proc_handler = xfs_deprecated_dointvec_minmax,
194+
.extra1 = &xfs_params.blockgc_timer.min,
195+
.extra2 = &xfs_params.blockgc_timer.max,
196+
},
204197
/* please keep this the last entry */
205198
#ifdef CONFIG_PROC_FS
206199
{

fs/xfs/xfs_trans.c

Lines changed: 15 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,7 @@ xfs_trans_free(
7272
xfs_extent_busy_clear(tp->t_mountp, &tp->t_busy, false);
7373

7474
trace_xfs_trans_free(tp, _RET_IP_);
75+
xfs_trans_clear_context(tp);
7576
if (!(tp->t_flags & XFS_TRANS_NO_WRITECOUNT))
7677
sb_end_intwrite(tp->t_mountp->m_super);
7778
xfs_trans_free_dqinfo(tp);
@@ -123,7 +124,8 @@ xfs_trans_dup(
123124

124125
ntp->t_rtx_res = tp->t_rtx_res - tp->t_rtx_res_used;
125126
tp->t_rtx_res = tp->t_rtx_res_used;
126-
ntp->t_pflags = tp->t_pflags;
127+
128+
xfs_trans_switch_context(tp, ntp);
127129

128130
/* move deferred ops over to the new tp */
129131
xfs_defer_move(ntp, tp);
@@ -157,20 +159,15 @@ xfs_trans_reserve(
157159
int error = 0;
158160
bool rsvd = (tp->t_flags & XFS_TRANS_RESERVE) != 0;
159161

160-
/* Mark this thread as being in a transaction */
161-
current_set_flags_nested(&tp->t_pflags, PF_MEMALLOC_NOFS);
162-
163162
/*
164163
* Attempt to reserve the needed disk blocks by decrementing
165164
* the number needed from the number available. This will
166165
* fail if the count would go below zero.
167166
*/
168167
if (blocks > 0) {
169168
error = xfs_mod_fdblocks(mp, -((int64_t)blocks), rsvd);
170-
if (error != 0) {
171-
current_restore_flags_nested(&tp->t_pflags, PF_MEMALLOC_NOFS);
169+
if (error != 0)
172170
return -ENOSPC;
173-
}
174171
tp->t_blk_res += blocks;
175172
}
176173

@@ -244,9 +241,6 @@ xfs_trans_reserve(
244241
xfs_mod_fdblocks(mp, (int64_t)blocks, rsvd);
245242
tp->t_blk_res = 0;
246243
}
247-
248-
current_restore_flags_nested(&tp->t_pflags, PF_MEMALLOC_NOFS);
249-
250244
return error;
251245
}
252246

@@ -260,16 +254,19 @@ xfs_trans_alloc(
260254
struct xfs_trans **tpp)
261255
{
262256
struct xfs_trans *tp;
257+
bool want_retry = true;
263258
int error;
264259

265260
/*
266261
* Allocate the handle before we do our freeze accounting and setting up
267262
* GFP_NOFS allocation context so that we avoid lockdep false positives
268263
* by doing GFP_KERNEL allocations inside sb_start_intwrite().
269264
*/
265+
retry:
270266
tp = kmem_cache_zalloc(xfs_trans_zone, GFP_KERNEL | __GFP_NOFAIL);
271267
if (!(flags & XFS_TRANS_NO_WRITECOUNT))
272268
sb_start_intwrite(mp->m_super);
269+
xfs_trans_set_context(tp);
273270

274271
/*
275272
* Zero-reservation ("empty") transactions can't modify anything, so
@@ -289,16 +286,21 @@ xfs_trans_alloc(
289286
tp->t_firstblock = NULLFSBLOCK;
290287

291288
error = xfs_trans_reserve(tp, resp, blocks, rtextents);
292-
if (error == -ENOSPC) {
289+
if (error == -ENOSPC && want_retry) {
290+
xfs_trans_cancel(tp);
291+
293292
/*
294293
* We weren't able to reserve enough space for the transaction.
295294
* Flush the other speculative space allocations to free space.
296295
* Do not perform a synchronous scan because callers can hold
297296
* other locks.
298297
*/
299298
error = xfs_blockgc_free_space(mp, NULL);
300-
if (!error)
301-
error = xfs_trans_reserve(tp, resp, blocks, rtextents);
299+
if (error)
300+
return error;
301+
302+
want_retry = false;
303+
goto retry;
302304
}
303305
if (error) {
304306
xfs_trans_cancel(tp);
@@ -893,7 +895,6 @@ __xfs_trans_commit(
893895

894896
xfs_log_commit_cil(mp, tp, &commit_lsn, regrant);
895897

896-
current_restore_flags_nested(&tp->t_pflags, PF_MEMALLOC_NOFS);
897898
xfs_trans_free(tp);
898899

899900
/*
@@ -925,7 +926,6 @@ __xfs_trans_commit(
925926
xfs_log_ticket_ungrant(mp->m_log, tp->t_ticket);
926927
tp->t_ticket = NULL;
927928
}
928-
current_restore_flags_nested(&tp->t_pflags, PF_MEMALLOC_NOFS);
929929
xfs_trans_free_items(tp, !!error);
930930
xfs_trans_free(tp);
931931

@@ -985,9 +985,6 @@ xfs_trans_cancel(
985985
tp->t_ticket = NULL;
986986
}
987987

988-
/* mark this thread as no longer being in a transaction */
989-
current_restore_flags_nested(&tp->t_pflags, PF_MEMALLOC_NOFS);
990-
991988
xfs_trans_free_items(tp, dirty);
992989
xfs_trans_free(tp);
993990
}

fs/xfs/xfs_trans.h

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -281,4 +281,34 @@ int xfs_trans_alloc_ichange(struct xfs_inode *ip, struct xfs_dquot *udqp,
281281
struct xfs_dquot *gdqp, struct xfs_dquot *pdqp, bool force,
282282
struct xfs_trans **tpp);
283283

284+
static inline void
285+
xfs_trans_set_context(
286+
struct xfs_trans *tp)
287+
{
288+
ASSERT(current->journal_info == NULL);
289+
tp->t_pflags = memalloc_nofs_save();
290+
current->journal_info = tp;
291+
}
292+
293+
static inline void
294+
xfs_trans_clear_context(
295+
struct xfs_trans *tp)
296+
{
297+
if (current->journal_info == tp) {
298+
memalloc_nofs_restore(tp->t_pflags);
299+
current->journal_info = NULL;
300+
}
301+
}
302+
303+
static inline void
304+
xfs_trans_switch_context(
305+
struct xfs_trans *old_tp,
306+
struct xfs_trans *new_tp)
307+
{
308+
ASSERT(current->journal_info == old_tp);
309+
new_tp->t_pflags = old_tp->t_pflags;
310+
old_tp->t_pflags = 0;
311+
current->journal_info = new_tp;
312+
}
313+
284314
#endif /* __XFS_TRANS_H__ */

0 commit comments

Comments
 (0)