Skip to content

Commit aa66032

Browse files
committed
Merge tag 'xfs-5.14-fixes-2' of git://git.kernel.org/pub/scm/fs/xfs/xfs-linux
Pull xfs fixes from Darrick Wong: "This contains a bunch of bug fixes in XFS. Dave and I have been busy the last couple of weeks to find and fix as many log recovery bugs as we can find; here are the results so far. Go fstests -g recoveryloop! ;) - Fix a number of coordination bugs relating to cache flushes for metadata writeback, cache flushes for multi-buffer log writes, and FUA writes for single-buffer log writes - Fix a bug with incorrect replay of attr3 blocks - Fix unnecessary stalls when flushing logs to disk - Fix spoofing problems when recovering realtime bitmap blocks" * tag 'xfs-5.14-fixes-2' of git://git.kernel.org/pub/scm/fs/xfs/xfs-linux: xfs: prevent spoofing of rtbitmap blocks when recovering buffers xfs: limit iclog tail updates xfs: need to see iclog flags in tracing xfs: Enforce attr3 buffer recovery order xfs: logging the on disk inode LSN can make it go backwards xfs: avoid unnecessary waits in xfs_log_force_lsn() xfs: log forces imply data device cache flushes xfs: factor out forced iclog flushes xfs: fix ordering violation between cache flushes and tail updates xfs: fold __xlog_state_release_iclog into xlog_state_release_iclog xfs: external logs need to flush data device xfs: flush data dev on external log write
2 parents f3438b4 + 81a448d commit aa66032

File tree

7 files changed

+244
-106
lines changed

7 files changed

+244
-106
lines changed

fs/xfs/libxfs/xfs_log_format.h

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -411,7 +411,16 @@ struct xfs_log_dinode {
411411
/* start of the extended dinode, writable fields */
412412
uint32_t di_crc; /* CRC of the inode */
413413
uint64_t di_changecount; /* number of attribute changes */
414-
xfs_lsn_t di_lsn; /* flush sequence */
414+
415+
/*
416+
* The LSN we write to this field during formatting is not a reflection
417+
* of the current on-disk LSN. It should never be used for recovery
418+
* sequencing, nor should it be recovered into the on-disk inode at all.
419+
* See xlog_recover_inode_commit_pass2() and xfs_log_dinode_to_disk()
420+
* for details.
421+
*/
422+
xfs_lsn_t di_lsn;
423+
415424
uint64_t di_flags2; /* more random flags */
416425
uint32_t di_cowextsize; /* basic cow extent size for file */
417426
uint8_t di_pad2[12]; /* more padding for future expansion */

fs/xfs/xfs_buf_item_recover.c

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -698,19 +698,29 @@ xlog_recover_do_inode_buffer(
698698
static xfs_lsn_t
699699
xlog_recover_get_buf_lsn(
700700
struct xfs_mount *mp,
701-
struct xfs_buf *bp)
701+
struct xfs_buf *bp,
702+
struct xfs_buf_log_format *buf_f)
702703
{
703704
uint32_t magic32;
704705
uint16_t magic16;
705706
uint16_t magicda;
706707
void *blk = bp->b_addr;
707708
uuid_t *uuid;
708709
xfs_lsn_t lsn = -1;
710+
uint16_t blft;
709711

710712
/* v4 filesystems always recover immediately */
711713
if (!xfs_sb_version_hascrc(&mp->m_sb))
712714
goto recover_immediately;
713715

716+
/*
717+
* realtime bitmap and summary file blocks do not have magic numbers or
718+
* UUIDs, so we must recover them immediately.
719+
*/
720+
blft = xfs_blft_from_flags(buf_f);
721+
if (blft == XFS_BLFT_RTBITMAP_BUF || blft == XFS_BLFT_RTSUMMARY_BUF)
722+
goto recover_immediately;
723+
714724
magic32 = be32_to_cpu(*(__be32 *)blk);
715725
switch (magic32) {
716726
case XFS_ABTB_CRC_MAGIC:
@@ -796,6 +806,7 @@ xlog_recover_get_buf_lsn(
796806
switch (magicda) {
797807
case XFS_DIR3_LEAF1_MAGIC:
798808
case XFS_DIR3_LEAFN_MAGIC:
809+
case XFS_ATTR3_LEAF_MAGIC:
799810
case XFS_DA3_NODE_MAGIC:
800811
lsn = be64_to_cpu(((struct xfs_da3_blkinfo *)blk)->lsn);
801812
uuid = &((struct xfs_da3_blkinfo *)blk)->uuid;
@@ -919,7 +930,7 @@ xlog_recover_buf_commit_pass2(
919930
* the verifier will be reset to match whatever recover turns that
920931
* buffer into.
921932
*/
922-
lsn = xlog_recover_get_buf_lsn(mp, bp);
933+
lsn = xlog_recover_get_buf_lsn(mp, bp, buf_f);
923934
if (lsn && lsn != -1 && XFS_LSN_CMP(lsn, current_lsn) >= 0) {
924935
trace_xfs_log_recover_buf_skip(log, buf_f);
925936
xlog_recover_validate_buf_type(mp, bp, buf_f, NULLCOMMITLSN);

fs/xfs/xfs_inode_item_recover.c

Lines changed: 29 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -145,7 +145,8 @@ xfs_log_dinode_to_disk_ts(
145145
STATIC void
146146
xfs_log_dinode_to_disk(
147147
struct xfs_log_dinode *from,
148-
struct xfs_dinode *to)
148+
struct xfs_dinode *to,
149+
xfs_lsn_t lsn)
149150
{
150151
to->di_magic = cpu_to_be16(from->di_magic);
151152
to->di_mode = cpu_to_be16(from->di_mode);
@@ -182,7 +183,7 @@ xfs_log_dinode_to_disk(
182183
to->di_flags2 = cpu_to_be64(from->di_flags2);
183184
to->di_cowextsize = cpu_to_be32(from->di_cowextsize);
184185
to->di_ino = cpu_to_be64(from->di_ino);
185-
to->di_lsn = cpu_to_be64(from->di_lsn);
186+
to->di_lsn = cpu_to_be64(lsn);
186187
memcpy(to->di_pad2, from->di_pad2, sizeof(to->di_pad2));
187188
uuid_copy(&to->di_uuid, &from->di_uuid);
188189
to->di_flushiter = 0;
@@ -261,16 +262,25 @@ xlog_recover_inode_commit_pass2(
261262
}
262263

263264
/*
264-
* If the inode has an LSN in it, recover the inode only if it's less
265-
* than the lsn of the transaction we are replaying. Note: we still
266-
* need to replay an owner change even though the inode is more recent
267-
* than the transaction as there is no guarantee that all the btree
268-
* blocks are more recent than this transaction, too.
265+
* If the inode has an LSN in it, recover the inode only if the on-disk
266+
* inode's LSN is older than the lsn of the transaction we are
267+
* replaying. We can have multiple checkpoints with the same start LSN,
268+
* so the current LSN being equal to the on-disk LSN doesn't necessarily
269+
* mean that the on-disk inode is more recent than the change being
270+
* replayed.
271+
*
272+
* We must check the current_lsn against the on-disk inode
273+
* here because the we can't trust the log dinode to contain a valid LSN
274+
* (see comment below before replaying the log dinode for details).
275+
*
276+
* Note: we still need to replay an owner change even though the inode
277+
* is more recent than the transaction as there is no guarantee that all
278+
* the btree blocks are more recent than this transaction, too.
269279
*/
270280
if (dip->di_version >= 3) {
271281
xfs_lsn_t lsn = be64_to_cpu(dip->di_lsn);
272282

273-
if (lsn && lsn != -1 && XFS_LSN_CMP(lsn, current_lsn) >= 0) {
283+
if (lsn && lsn != -1 && XFS_LSN_CMP(lsn, current_lsn) > 0) {
274284
trace_xfs_log_recover_inode_skip(log, in_f);
275285
error = 0;
276286
goto out_owner_change;
@@ -368,8 +378,17 @@ xlog_recover_inode_commit_pass2(
368378
goto out_release;
369379
}
370380

371-
/* recover the log dinode inode into the on disk inode */
372-
xfs_log_dinode_to_disk(ldip, dip);
381+
/*
382+
* Recover the log dinode inode into the on disk inode.
383+
*
384+
* The LSN in the log dinode is garbage - it can be zero or reflect
385+
* stale in-memory runtime state that isn't coherent with the changes
386+
* logged in this transaction or the changes written to the on-disk
387+
* inode. Hence we write the current lSN into the inode because that
388+
* matches what xfs_iflush() would write inode the inode when flushing
389+
* the changes in this transaction.
390+
*/
391+
xfs_log_dinode_to_disk(ldip, dip, current_lsn);
373392

374393
fields = in_f->ilf_fields;
375394
if (fields & XFS_ILOG_DEV)

0 commit comments

Comments
 (0)