Skip to content

Commit 75b9c72

Browse files
committed
Merge tag 'xfs-5.13-fixes-3' of git://git.kernel.org/pub/scm/fs/xfs/xfs-linux
Pull xfs fixes from Darrick Wong: "This week's pile mitigates some decades-old problems in how extent size hints interact with realtime volumes, fixes some failures in online shrink, and fixes a problem where directory and symlink shrinking on extremely fragmented filesystems could fail. The most user-notable change here is to point users at our (new) IRC channel on OFTC. Freedom isn't free, it costs folks like you and me; and if you don't kowtow, they'll expel everyone and take over your channel. (Ok, ok, that didn't fit the song lyrics...) Summary: - Fix a bug where unmapping operations end earlier than expected, which can cause chaos on multi-block directory and symlink shrink operations. - Fix an erroneous assert that can trigger if we try to transition a bmap structure from btree format to extents format with zero extents. This was exposed by xfs/538" * tag 'xfs-5.13-fixes-3' of git://git.kernel.org/pub/scm/fs/xfs/xfs-linux: xfs: bunmapi has unnecessary AG lock ordering issues xfs: btree format inode forks can have zero extents xfs: add new IRC channel to MAINTAINERS xfs: validate extsz hints against rt extent size when rtinherit is set xfs: standardize extent size hint validation xfs: check free AG space when making per-AG reservations
2 parents df8c66c + 0fe0bbe commit 75b9c72

File tree

8 files changed

+140
-86
lines changed

8 files changed

+140
-86
lines changed

MAINTAINERS

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20014,6 +20014,7 @@ F: arch/x86/xen/*swiotlb*
2001420014
F: drivers/xen/*swiotlb*
2001520015

2001620016
XFS FILESYSTEM
20017+
C: irc://irc.oftc.net/xfs
2001720018
M: Darrick J. Wong <[email protected]>
2001820019
2001920020

fs/xfs/libxfs/xfs_ag_resv.c

Lines changed: 15 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -325,10 +325,22 @@ xfs_ag_resv_init(
325325
error2 = xfs_alloc_pagf_init(mp, tp, pag->pag_agno, 0);
326326
if (error2)
327327
return error2;
328-
ASSERT(xfs_perag_resv(pag, XFS_AG_RESV_METADATA)->ar_reserved +
329-
xfs_perag_resv(pag, XFS_AG_RESV_RMAPBT)->ar_reserved <=
330-
pag->pagf_freeblks + pag->pagf_flcount);
328+
329+
/*
330+
* If there isn't enough space in the AG to satisfy the
331+
* reservation, let the caller know that there wasn't enough
332+
* space. Callers are responsible for deciding what to do
333+
* next, since (in theory) we can stumble along with
334+
* insufficient reservation if data blocks are being freed to
335+
* replenish the AG's free space.
336+
*/
337+
if (!error &&
338+
xfs_perag_resv(pag, XFS_AG_RESV_METADATA)->ar_reserved +
339+
xfs_perag_resv(pag, XFS_AG_RESV_RMAPBT)->ar_reserved >
340+
pag->pagf_freeblks + pag->pagf_flcount)
341+
error = -ENOSPC;
331342
}
343+
332344
return error;
333345
}
334346

fs/xfs/libxfs/xfs_bmap.c

Lines changed: 0 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -605,7 +605,6 @@ xfs_bmap_btree_to_extents(
605605

606606
ASSERT(cur);
607607
ASSERT(whichfork != XFS_COW_FORK);
608-
ASSERT(!xfs_need_iread_extents(ifp));
609608
ASSERT(ifp->if_format == XFS_DINODE_FMT_BTREE);
610609
ASSERT(be16_to_cpu(rblock->bb_level) == 1);
611610
ASSERT(be16_to_cpu(rblock->bb_numrecs) == 1);
@@ -5350,7 +5349,6 @@ __xfs_bunmapi(
53505349
xfs_fsblock_t sum;
53515350
xfs_filblks_t len = *rlen; /* length to unmap in file */
53525351
xfs_fileoff_t max_len;
5353-
xfs_agnumber_t prev_agno = NULLAGNUMBER, agno;
53545352
xfs_fileoff_t end;
53555353
struct xfs_iext_cursor icur;
53565354
bool done = false;
@@ -5442,16 +5440,6 @@ __xfs_bunmapi(
54425440
del = got;
54435441
wasdel = isnullstartblock(del.br_startblock);
54445442

5445-
/*
5446-
* Make sure we don't touch multiple AGF headers out of order
5447-
* in a single transaction, as that could cause AB-BA deadlocks.
5448-
*/
5449-
if (!wasdel && !isrt) {
5450-
agno = XFS_FSB_TO_AGNO(mp, del.br_startblock);
5451-
if (prev_agno != NULLAGNUMBER && prev_agno > agno)
5452-
break;
5453-
prev_agno = agno;
5454-
}
54555443
if (got.br_startoff < start) {
54565444
del.br_startoff = start;
54575445
del.br_blockcount -= start - got.br_startoff;

fs/xfs/libxfs/xfs_inode_buf.c

Lines changed: 42 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -559,8 +559,17 @@ xfs_dinode_calc_crc(
559559
/*
560560
* Validate di_extsize hint.
561561
*
562-
* The rules are documented at xfs_ioctl_setattr_check_extsize().
563-
* These functions must be kept in sync with each other.
562+
* 1. Extent size hint is only valid for directories and regular files.
563+
* 2. FS_XFLAG_EXTSIZE is only valid for regular files.
564+
* 3. FS_XFLAG_EXTSZINHERIT is only valid for directories.
565+
* 4. Hint cannot be larger than MAXTEXTLEN.
566+
* 5. Can be changed on directories at any time.
567+
* 6. Hint value of 0 turns off hints, clears inode flags.
568+
* 7. Extent size must be a multiple of the appropriate block size.
569+
* For realtime files, this is the rt extent size.
570+
* 8. For non-realtime files, the extent size hint must be limited
571+
* to half the AG size to avoid alignment extending the extent beyond the
572+
* limits of the AG.
564573
*/
565574
xfs_failaddr_t
566575
xfs_inode_validate_extsize(
@@ -580,6 +589,28 @@ xfs_inode_validate_extsize(
580589
inherit_flag = (flags & XFS_DIFLAG_EXTSZINHERIT);
581590
extsize_bytes = XFS_FSB_TO_B(mp, extsize);
582591

592+
/*
593+
* This comment describes a historic gap in this verifier function.
594+
*
595+
* On older kernels, the extent size hint verifier doesn't check that
596+
* the extent size hint is an integer multiple of the realtime extent
597+
* size on a directory with both RTINHERIT and EXTSZINHERIT flags set.
598+
* The verifier has always enforced the alignment rule for regular
599+
* files with the REALTIME flag set.
600+
*
601+
* If a directory with a misaligned extent size hint is allowed to
602+
* propagate that hint into a new regular realtime file, the result
603+
* is that the inode cluster buffer verifier will trigger a corruption
604+
* shutdown the next time it is run.
605+
*
606+
* Unfortunately, there could be filesystems with these misconfigured
607+
* directories in the wild, so we cannot add a check to this verifier
608+
* at this time because that will result a new source of directory
609+
* corruption errors when reading an existing filesystem. Instead, we
610+
* permit the misconfiguration to pass through the verifiers so that
611+
* callers of this function can correct and mitigate externally.
612+
*/
613+
583614
if (rt_flag)
584615
blocksize_bytes = mp->m_sb.sb_rextsize << mp->m_sb.sb_blocklog;
585616
else
@@ -616,8 +647,15 @@ xfs_inode_validate_extsize(
616647
/*
617648
* Validate di_cowextsize hint.
618649
*
619-
* The rules are documented at xfs_ioctl_setattr_check_cowextsize().
620-
* These functions must be kept in sync with each other.
650+
* 1. CoW extent size hint can only be set if reflink is enabled on the fs.
651+
* The inode does not have to have any shared blocks, but it must be a v3.
652+
* 2. FS_XFLAG_COWEXTSIZE is only valid for directories and regular files;
653+
* for a directory, the hint is propagated to new files.
654+
* 3. Can be changed on files & directories at any time.
655+
* 4. Hint value of 0 turns off hints, clears inode flags.
656+
* 5. Extent size must be a multiple of the appropriate block size.
657+
* 6. The extent size hint must be limited to half the AG size to avoid
658+
* alignment extending the extent beyond the limits of the AG.
621659
*/
622660
xfs_failaddr_t
623661
xfs_inode_validate_cowextsize(

fs/xfs/libxfs/xfs_trans_inode.c

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -142,6 +142,23 @@ xfs_trans_log_inode(
142142
flags |= XFS_ILOG_CORE;
143143
}
144144

145+
/*
146+
* Inode verifiers on older kernels don't check that the extent size
147+
* hint is an integer multiple of the rt extent size on a directory
148+
* with both rtinherit and extszinherit flags set. If we're logging a
149+
* directory that is misconfigured in this way, clear the hint.
150+
*/
151+
if ((ip->i_diflags & XFS_DIFLAG_RTINHERIT) &&
152+
(ip->i_diflags & XFS_DIFLAG_EXTSZINHERIT) &&
153+
(ip->i_extsize % ip->i_mount->m_sb.sb_rextsize) > 0) {
154+
xfs_info_once(ip->i_mount,
155+
"Correcting misaligned extent size hint in inode 0x%llx.", ip->i_ino);
156+
ip->i_diflags &= ~(XFS_DIFLAG_EXTSIZE |
157+
XFS_DIFLAG_EXTSZINHERIT);
158+
ip->i_extsize = 0;
159+
flags |= XFS_ILOG_CORE;
160+
}
161+
145162
/*
146163
* Record the specific change for fdatasync optimisation. This allows
147164
* fdatasync to skip log forces for inodes that are only timestamp

fs/xfs/xfs_inode.c

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -690,6 +690,7 @@ xfs_inode_inherit_flags(
690690
const struct xfs_inode *pip)
691691
{
692692
unsigned int di_flags = 0;
693+
xfs_failaddr_t failaddr;
693694
umode_t mode = VFS_I(ip)->i_mode;
694695

695696
if (S_ISDIR(mode)) {
@@ -729,6 +730,24 @@ xfs_inode_inherit_flags(
729730
di_flags |= XFS_DIFLAG_FILESTREAM;
730731

731732
ip->i_diflags |= di_flags;
733+
734+
/*
735+
* Inode verifiers on older kernels only check that the extent size
736+
* hint is an integer multiple of the rt extent size on realtime files.
737+
* They did not check the hint alignment on a directory with both
738+
* rtinherit and extszinherit flags set. If the misaligned hint is
739+
* propagated from a directory into a new realtime file, new file
740+
* allocations will fail due to math errors in the rt allocator and/or
741+
* trip the verifiers. Validate the hint settings in the new file so
742+
* that we don't let broken hints propagate.
743+
*/
744+
failaddr = xfs_inode_validate_extsize(ip->i_mount, ip->i_extsize,
745+
VFS_I(ip)->i_mode, ip->i_diflags);
746+
if (failaddr) {
747+
ip->i_diflags &= ~(XFS_DIFLAG_EXTSIZE |
748+
XFS_DIFLAG_EXTSZINHERIT);
749+
ip->i_extsize = 0;
750+
}
732751
}
733752

734753
/* Propagate di_flags2 from a parent inode to a child inode. */
@@ -737,12 +756,22 @@ xfs_inode_inherit_flags2(
737756
struct xfs_inode *ip,
738757
const struct xfs_inode *pip)
739758
{
759+
xfs_failaddr_t failaddr;
760+
740761
if (pip->i_diflags2 & XFS_DIFLAG2_COWEXTSIZE) {
741762
ip->i_diflags2 |= XFS_DIFLAG2_COWEXTSIZE;
742763
ip->i_cowextsize = pip->i_cowextsize;
743764
}
744765
if (pip->i_diflags2 & XFS_DIFLAG2_DAX)
745766
ip->i_diflags2 |= XFS_DIFLAG2_DAX;
767+
768+
/* Don't let invalid cowextsize hints propagate. */
769+
failaddr = xfs_inode_validate_cowextsize(ip->i_mount, ip->i_cowextsize,
770+
VFS_I(ip)->i_mode, ip->i_diflags, ip->i_diflags2);
771+
if (failaddr) {
772+
ip->i_diflags2 &= ~XFS_DIFLAG2_COWEXTSIZE;
773+
ip->i_cowextsize = 0;
774+
}
746775
}
747776

748777
/*

fs/xfs/xfs_ioctl.c

Lines changed: 34 additions & 67 deletions
Original file line numberDiff line numberDiff line change
@@ -1267,107 +1267,74 @@ xfs_ioctl_setattr_get_trans(
12671267
}
12681268

12691269
/*
1270-
* extent size hint validation is somewhat cumbersome. Rules are:
1271-
*
1272-
* 1. extent size hint is only valid for directories and regular files
1273-
* 2. FS_XFLAG_EXTSIZE is only valid for regular files
1274-
* 3. FS_XFLAG_EXTSZINHERIT is only valid for directories.
1275-
* 4. can only be changed on regular files if no extents are allocated
1276-
* 5. can be changed on directories at any time
1277-
* 6. extsize hint of 0 turns off hints, clears inode flags.
1278-
* 7. Extent size must be a multiple of the appropriate block size.
1279-
* 8. for non-realtime files, the extent size hint must be limited
1280-
* to half the AG size to avoid alignment extending the extent beyond the
1281-
* limits of the AG.
1282-
*
1283-
* Please keep this function in sync with xfs_scrub_inode_extsize.
1270+
* Validate a proposed extent size hint. For regular files, the hint can only
1271+
* be changed if no extents are allocated.
12841272
*/
12851273
static int
12861274
xfs_ioctl_setattr_check_extsize(
12871275
struct xfs_inode *ip,
12881276
struct fileattr *fa)
12891277
{
12901278
struct xfs_mount *mp = ip->i_mount;
1291-
xfs_extlen_t size;
1292-
xfs_fsblock_t extsize_fsb;
1279+
xfs_failaddr_t failaddr;
1280+
uint16_t new_diflags;
12931281

12941282
if (!fa->fsx_valid)
12951283
return 0;
12961284

12971285
if (S_ISREG(VFS_I(ip)->i_mode) && ip->i_df.if_nextents &&
1298-
((ip->i_extsize << mp->m_sb.sb_blocklog) != fa->fsx_extsize))
1286+
XFS_FSB_TO_B(mp, ip->i_extsize) != fa->fsx_extsize)
12991287
return -EINVAL;
13001288

1301-
if (fa->fsx_extsize == 0)
1302-
return 0;
1303-
1304-
extsize_fsb = XFS_B_TO_FSB(mp, fa->fsx_extsize);
1305-
if (extsize_fsb > MAXEXTLEN)
1289+
if (fa->fsx_extsize & mp->m_blockmask)
13061290
return -EINVAL;
13071291

1308-
if (XFS_IS_REALTIME_INODE(ip) ||
1309-
(fa->fsx_xflags & FS_XFLAG_REALTIME)) {
1310-
size = mp->m_sb.sb_rextsize << mp->m_sb.sb_blocklog;
1311-
} else {
1312-
size = mp->m_sb.sb_blocksize;
1313-
if (extsize_fsb > mp->m_sb.sb_agblocks / 2)
1292+
new_diflags = xfs_flags2diflags(ip, fa->fsx_xflags);
1293+
1294+
/*
1295+
* Inode verifiers on older kernels don't check that the extent size
1296+
* hint is an integer multiple of the rt extent size on a directory
1297+
* with both rtinherit and extszinherit flags set. Don't let sysadmins
1298+
* misconfigure directories.
1299+
*/
1300+
if ((new_diflags & XFS_DIFLAG_RTINHERIT) &&
1301+
(new_diflags & XFS_DIFLAG_EXTSZINHERIT)) {
1302+
unsigned int rtextsize_bytes;
1303+
1304+
rtextsize_bytes = XFS_FSB_TO_B(mp, mp->m_sb.sb_rextsize);
1305+
if (fa->fsx_extsize % rtextsize_bytes)
13141306
return -EINVAL;
13151307
}
13161308

1317-
if (fa->fsx_extsize % size)
1318-
return -EINVAL;
1319-
1320-
return 0;
1309+
failaddr = xfs_inode_validate_extsize(ip->i_mount,
1310+
XFS_B_TO_FSB(mp, fa->fsx_extsize),
1311+
VFS_I(ip)->i_mode, new_diflags);
1312+
return failaddr != NULL ? -EINVAL : 0;
13211313
}
13221314

1323-
/*
1324-
* CoW extent size hint validation rules are:
1325-
*
1326-
* 1. CoW extent size hint can only be set if reflink is enabled on the fs.
1327-
* The inode does not have to have any shared blocks, but it must be a v3.
1328-
* 2. FS_XFLAG_COWEXTSIZE is only valid for directories and regular files;
1329-
* for a directory, the hint is propagated to new files.
1330-
* 3. Can be changed on files & directories at any time.
1331-
* 4. CoW extsize hint of 0 turns off hints, clears inode flags.
1332-
* 5. Extent size must be a multiple of the appropriate block size.
1333-
* 6. The extent size hint must be limited to half the AG size to avoid
1334-
* alignment extending the extent beyond the limits of the AG.
1335-
*
1336-
* Please keep this function in sync with xfs_scrub_inode_cowextsize.
1337-
*/
13381315
static int
13391316
xfs_ioctl_setattr_check_cowextsize(
13401317
struct xfs_inode *ip,
13411318
struct fileattr *fa)
13421319
{
13431320
struct xfs_mount *mp = ip->i_mount;
1344-
xfs_extlen_t size;
1345-
xfs_fsblock_t cowextsize_fsb;
1321+
xfs_failaddr_t failaddr;
1322+
uint64_t new_diflags2;
1323+
uint16_t new_diflags;
13461324

13471325
if (!fa->fsx_valid)
13481326
return 0;
13491327

1350-
if (!(fa->fsx_xflags & FS_XFLAG_COWEXTSIZE))
1351-
return 0;
1352-
1353-
if (!xfs_sb_version_hasreflink(&ip->i_mount->m_sb))
1328+
if (fa->fsx_cowextsize & mp->m_blockmask)
13541329
return -EINVAL;
13551330

1356-
if (fa->fsx_cowextsize == 0)
1357-
return 0;
1331+
new_diflags = xfs_flags2diflags(ip, fa->fsx_xflags);
1332+
new_diflags2 = xfs_flags2diflags2(ip, fa->fsx_xflags);
13581333

1359-
cowextsize_fsb = XFS_B_TO_FSB(mp, fa->fsx_cowextsize);
1360-
if (cowextsize_fsb > MAXEXTLEN)
1361-
return -EINVAL;
1362-
1363-
size = mp->m_sb.sb_blocksize;
1364-
if (cowextsize_fsb > mp->m_sb.sb_agblocks / 2)
1365-
return -EINVAL;
1366-
1367-
if (fa->fsx_cowextsize % size)
1368-
return -EINVAL;
1369-
1370-
return 0;
1334+
failaddr = xfs_inode_validate_cowextsize(ip->i_mount,
1335+
XFS_B_TO_FSB(mp, fa->fsx_cowextsize),
1336+
VFS_I(ip)->i_mode, new_diflags, new_diflags2);
1337+
return failaddr != NULL ? -EINVAL : 0;
13711338
}
13721339

13731340
static int

fs/xfs/xfs_message.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,8 @@ do { \
7373
xfs_printk_once(xfs_warn, dev, fmt, ##__VA_ARGS__)
7474
#define xfs_notice_once(dev, fmt, ...) \
7575
xfs_printk_once(xfs_notice, dev, fmt, ##__VA_ARGS__)
76+
#define xfs_info_once(dev, fmt, ...) \
77+
xfs_printk_once(xfs_info, dev, fmt, ##__VA_ARGS__)
7678

7779
void assfail(struct xfs_mount *mp, char *expr, char *f, int l);
7880
void asswarn(struct xfs_mount *mp, char *expr, char *f, int l);

0 commit comments

Comments
 (0)