Skip to content

Commit 3be20b6

Browse files
committed
Merge tag 'ext4-for-linus-5.8-rc1-2' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4
Pull more ext4 updates from Ted Ts'o: "This is the second round of ext4 commits for 5.8 merge window [1]. It includes the per-inode DAX support, which was dependant on the DAX infrastructure which came in via the XFS tree, and a number of regression and bug fixes; most notably the "BUG: using smp_processor_id() in preemptible code in ext4_mb_new_blocks" reported by syzkaller" [1] The pull request actually came in 15 minutes after I had tagged the rc1 release. Tssk, tssk, late.. - Linus * tag 'ext4-for-linus-5.8-rc1-2' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4: ext4, jbd2: ensure panic by fix a race between jbd2 abort and ext4 error handlers ext4: support xattr gnu.* namespace for the Hurd ext4: mballoc: Use this_cpu_read instead of this_cpu_ptr ext4: avoid utf8_strncasecmp() with unstable name ext4: stop overwrite the errcode in ext4_setup_super ext4: fix partial cluster initialization when splitting extent ext4: avoid race conditions when remounting with options that change dax Documentation/dax: Update DAX enablement for ext4 fs/ext4: Introduce DAX inode flag fs/ext4: Remove jflag variable fs/ext4: Make DAX mount option a tri-state fs/ext4: Only change S_DAX on inode load fs/ext4: Update ext4_should_use_dax() fs/ext4: Change EXT4_MOUNT_DAX to EXT4_MOUNT_DAX_ALWAYS fs/ext4: Disallow verity if inode is DAX fs/ext4: Narrow scope of DAX check in setflags
2 parents b3a9e3b + 7b97d86 commit 3be20b6

File tree

19 files changed

+290
-73
lines changed

19 files changed

+290
-73
lines changed

Documentation/filesystems/dax.txt

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -25,16 +25,16 @@ size when creating the filesystem.
2525
Currently 3 filesystems support DAX: ext2, ext4 and xfs. Enabling DAX on them
2626
is different.
2727

28-
Enabling DAX on ext4 and ext2
28+
Enabling DAX on ext2
2929
-----------------------------
3030

3131
When mounting the filesystem, use the "-o dax" option on the command line or
3232
add 'dax' to the options in /etc/fstab. This works to enable DAX on all files
3333
within the filesystem. It is equivalent to the '-o dax=always' behavior below.
3434

3535

36-
Enabling DAX on xfs
37-
-------------------
36+
Enabling DAX on xfs and ext4
37+
----------------------------
3838

3939
Summary
4040
-------

Documentation/filesystems/ext4/verity.rst

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,3 +39,6 @@ is encrypted as well as the data itself.
3939

4040
Verity files cannot have blocks allocated past the end of the verity
4141
metadata.
42+
43+
Verity and DAX are not compatible and attempts to set both of these flags
44+
on a file will fail.

fs/ext4/Makefile

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,8 @@ ext4-y := balloc.o bitmap.o block_validity.o dir.o ext4_jbd2.o extents.o \
99
extents_status.o file.o fsmap.o fsync.o hash.o ialloc.o \
1010
indirect.o inline.o inode.o ioctl.o mballoc.o migrate.o \
1111
mmp.o move_extent.o namei.o page-io.o readpage.o resize.o \
12-
super.o symlink.o sysfs.o xattr.o xattr_trusted.o xattr_user.o
12+
super.o symlink.o sysfs.o xattr.o xattr_hurd.o xattr_trusted.o \
13+
xattr_user.o
1314

1415
ext4-$(CONFIG_EXT4_FS_POSIX_ACL) += acl.o
1516
ext4-$(CONFIG_EXT4_FS_SECURITY) += xattr_security.o

fs/ext4/dir.c

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -675,6 +675,7 @@ static int ext4_d_compare(const struct dentry *dentry, unsigned int len,
675675
struct qstr qstr = {.name = str, .len = len };
676676
const struct dentry *parent = READ_ONCE(dentry->d_parent);
677677
const struct inode *inode = READ_ONCE(parent->d_inode);
678+
char strbuf[DNAME_INLINE_LEN];
678679

679680
if (!inode || !IS_CASEFOLDED(inode) ||
680681
!EXT4_SB(inode->i_sb)->s_encoding) {
@@ -683,6 +684,21 @@ static int ext4_d_compare(const struct dentry *dentry, unsigned int len,
683684
return memcmp(str, name->name, len);
684685
}
685686

687+
/*
688+
* If the dentry name is stored in-line, then it may be concurrently
689+
* modified by a rename. If this happens, the VFS will eventually retry
690+
* the lookup, so it doesn't matter what ->d_compare() returns.
691+
* However, it's unsafe to call utf8_strncasecmp() with an unstable
692+
* string. Therefore, we have to copy the name into a temporary buffer.
693+
*/
694+
if (len <= DNAME_INLINE_LEN - 1) {
695+
memcpy(strbuf, str, len);
696+
strbuf[len] = 0;
697+
qstr.name = strbuf;
698+
/* prevent compiler from optimizing out the temporary buffer */
699+
barrier();
700+
}
701+
686702
return ext4_ci_compare(inode, name, &qstr, false);
687703
}
688704

fs/ext4/ext4.h

Lines changed: 20 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -426,28 +426,33 @@ struct flex_groups {
426426
#define EXT4_VERITY_FL 0x00100000 /* Verity protected inode */
427427
#define EXT4_EA_INODE_FL 0x00200000 /* Inode used for large EA */
428428
/* 0x00400000 was formerly EXT4_EOFBLOCKS_FL */
429+
430+
#define EXT4_DAX_FL 0x02000000 /* Inode is DAX */
431+
429432
#define EXT4_INLINE_DATA_FL 0x10000000 /* Inode has inline data. */
430433
#define EXT4_PROJINHERIT_FL 0x20000000 /* Create with parents projid */
431434
#define EXT4_CASEFOLD_FL 0x40000000 /* Casefolded directory */
432435
#define EXT4_RESERVED_FL 0x80000000 /* reserved for ext4 lib */
433436

434-
#define EXT4_FL_USER_VISIBLE 0x705BDFFF /* User visible flags */
435-
#define EXT4_FL_USER_MODIFIABLE 0x604BC0FF /* User modifiable flags */
437+
#define EXT4_FL_USER_VISIBLE 0x725BDFFF /* User visible flags */
438+
#define EXT4_FL_USER_MODIFIABLE 0x624BC0FF /* User modifiable flags */
436439

437440
/* Flags we can manipulate with through EXT4_IOC_FSSETXATTR */
438441
#define EXT4_FL_XFLAG_VISIBLE (EXT4_SYNC_FL | \
439442
EXT4_IMMUTABLE_FL | \
440443
EXT4_APPEND_FL | \
441444
EXT4_NODUMP_FL | \
442445
EXT4_NOATIME_FL | \
443-
EXT4_PROJINHERIT_FL)
446+
EXT4_PROJINHERIT_FL | \
447+
EXT4_DAX_FL)
444448

445449
/* Flags that should be inherited by new inodes from their parent. */
446450
#define EXT4_FL_INHERITED (EXT4_SECRM_FL | EXT4_UNRM_FL | EXT4_COMPR_FL |\
447451
EXT4_SYNC_FL | EXT4_NODUMP_FL | EXT4_NOATIME_FL |\
448452
EXT4_NOCOMPR_FL | EXT4_JOURNAL_DATA_FL |\
449453
EXT4_NOTAIL_FL | EXT4_DIRSYNC_FL |\
450-
EXT4_PROJINHERIT_FL | EXT4_CASEFOLD_FL)
454+
EXT4_PROJINHERIT_FL | EXT4_CASEFOLD_FL |\
455+
EXT4_DAX_FL)
451456

452457
/* Flags that are appropriate for regular files (all but dir-specific ones). */
453458
#define EXT4_REG_FLMASK (~(EXT4_DIRSYNC_FL | EXT4_TOPDIR_FL | EXT4_CASEFOLD_FL |\
@@ -459,6 +464,10 @@ struct flex_groups {
459464
/* The only flags that should be swapped */
460465
#define EXT4_FL_SHOULD_SWAP (EXT4_HUGE_FILE_FL | EXT4_EXTENTS_FL)
461466

467+
/* Flags which are mutually exclusive to DAX */
468+
#define EXT4_DAX_MUT_EXCL (EXT4_VERITY_FL | EXT4_ENCRYPT_FL |\
469+
EXT4_JOURNAL_DATA_FL)
470+
462471
/* Mask out flags that are inappropriate for the given type of inode. */
463472
static inline __u32 ext4_mask_flags(umode_t mode, __u32 flags)
464473
{
@@ -499,6 +508,7 @@ enum {
499508
EXT4_INODE_VERITY = 20, /* Verity protected inode */
500509
EXT4_INODE_EA_INODE = 21, /* Inode used for large EA */
501510
/* 22 was formerly EXT4_INODE_EOFBLOCKS */
511+
EXT4_INODE_DAX = 25, /* Inode is DAX */
502512
EXT4_INODE_INLINE_DATA = 28, /* Data in inode. */
503513
EXT4_INODE_PROJINHERIT = 29, /* Create with parents projid */
504514
EXT4_INODE_CASEFOLD = 30, /* Casefolded directory */
@@ -1135,9 +1145,9 @@ struct ext4_inode_info {
11351145
#define EXT4_MOUNT_MINIX_DF 0x00080 /* Mimics the Minix statfs */
11361146
#define EXT4_MOUNT_NOLOAD 0x00100 /* Don't use existing journal*/
11371147
#ifdef CONFIG_FS_DAX
1138-
#define EXT4_MOUNT_DAX 0x00200 /* Direct Access */
1148+
#define EXT4_MOUNT_DAX_ALWAYS 0x00200 /* Direct Access */
11391149
#else
1140-
#define EXT4_MOUNT_DAX 0
1150+
#define EXT4_MOUNT_DAX_ALWAYS 0
11411151
#endif
11421152
#define EXT4_MOUNT_DATA_FLAGS 0x00C00 /* Mode for data writes: */
11431153
#define EXT4_MOUNT_JOURNAL_DATA 0x00400 /* Write data to journal */
@@ -1180,6 +1190,8 @@ struct ext4_inode_info {
11801190
blocks */
11811191
#define EXT4_MOUNT2_HURD_COMPAT 0x00000004 /* Support HURD-castrated
11821192
file systems */
1193+
#define EXT4_MOUNT2_DAX_NEVER 0x00000008 /* Do not allow Direct Access */
1194+
#define EXT4_MOUNT2_DAX_INODE 0x00000010 /* For printing options only */
11831195

11841196
#define EXT4_MOUNT2_EXPLICIT_JOURNAL_CHECKSUM 0x00000008 /* User explicitly
11851197
specified journal checksum */
@@ -1992,6 +2004,7 @@ static inline bool ext4_has_incompat_features(struct super_block *sb)
19922004
*/
19932005
#define EXT4_FLAGS_RESIZING 0
19942006
#define EXT4_FLAGS_SHUTDOWN 1
2007+
#define EXT4_FLAGS_BDEV_IS_DAX 2
19952008

19962009
static inline int ext4_forced_shutdown(struct ext4_sb_info *sbi)
19972010
{
@@ -2705,7 +2718,7 @@ extern int ext4_can_truncate(struct inode *inode);
27052718
extern int ext4_truncate(struct inode *);
27062719
extern int ext4_break_layouts(struct inode *);
27072720
extern int ext4_punch_hole(struct inode *inode, loff_t offset, loff_t length);
2708-
extern void ext4_set_inode_flags(struct inode *);
2721+
extern void ext4_set_inode_flags(struct inode *, bool init);
27092722
extern int ext4_alloc_da_blocks(struct inode *inode);
27102723
extern void ext4_set_aops(struct inode *inode);
27112724
extern int ext4_writepage_trans_blocks(struct inode *);

fs/ext4/extents.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2844,7 +2844,7 @@ int ext4_ext_remove_space(struct inode *inode, ext4_lblk_t start,
28442844
* in use to avoid freeing it when removing blocks.
28452845
*/
28462846
if (sbi->s_cluster_ratio > 1) {
2847-
pblk = ext4_ext_pblock(ex) + end - ee_block + 2;
2847+
pblk = ext4_ext_pblock(ex) + end - ee_block + 1;
28482848
partial.pclu = EXT4_B2C(sbi, pblk);
28492849
partial.state = nofree;
28502850
}

fs/ext4/ialloc.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1116,7 +1116,7 @@ struct inode *__ext4_new_inode(handle_t *handle, struct inode *dir,
11161116
ei->i_block_group = group;
11171117
ei->i_last_alloc_group = ~0;
11181118

1119-
ext4_set_inode_flags(inode);
1119+
ext4_set_inode_flags(inode, true);
11201120
if (IS_DIRSYNC(inode))
11211121
ext4_handle_sync(handle);
11221122
if (insert_inode_locked(inode) < 0) {

fs/ext4/inode.c

Lines changed: 20 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -4403,9 +4403,11 @@ int ext4_get_inode_loc(struct inode *inode, struct ext4_iloc *iloc)
44034403
!ext4_test_inode_state(inode, EXT4_STATE_XATTR));
44044404
}
44054405

4406-
static bool ext4_should_use_dax(struct inode *inode)
4406+
static bool ext4_should_enable_dax(struct inode *inode)
44074407
{
4408-
if (!test_opt(inode->i_sb, DAX))
4408+
struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
4409+
4410+
if (test_opt2(inode->i_sb, DAX_NEVER))
44094411
return false;
44104412
if (!S_ISREG(inode->i_mode))
44114413
return false;
@@ -4417,14 +4419,21 @@ static bool ext4_should_use_dax(struct inode *inode)
44174419
return false;
44184420
if (ext4_test_inode_flag(inode, EXT4_INODE_VERITY))
44194421
return false;
4420-
return true;
4422+
if (!test_bit(EXT4_FLAGS_BDEV_IS_DAX, &sbi->s_ext4_flags))
4423+
return false;
4424+
if (test_opt(inode->i_sb, DAX_ALWAYS))
4425+
return true;
4426+
4427+
return ext4_test_inode_flag(inode, EXT4_INODE_DAX);
44214428
}
44224429

4423-
void ext4_set_inode_flags(struct inode *inode)
4430+
void ext4_set_inode_flags(struct inode *inode, bool init)
44244431
{
44254432
unsigned int flags = EXT4_I(inode)->i_flags;
44264433
unsigned int new_fl = 0;
44274434

4435+
WARN_ON_ONCE(IS_DAX(inode) && init);
4436+
44284437
if (flags & EXT4_SYNC_FL)
44294438
new_fl |= S_SYNC;
44304439
if (flags & EXT4_APPEND_FL)
@@ -4435,8 +4444,13 @@ void ext4_set_inode_flags(struct inode *inode)
44354444
new_fl |= S_NOATIME;
44364445
if (flags & EXT4_DIRSYNC_FL)
44374446
new_fl |= S_DIRSYNC;
4438-
if (ext4_should_use_dax(inode))
4447+
4448+
/* Because of the way inode_set_flags() works we must preserve S_DAX
4449+
* here if already set. */
4450+
new_fl |= (inode->i_flags & S_DAX);
4451+
if (init && ext4_should_enable_dax(inode))
44394452
new_fl |= S_DAX;
4453+
44404454
if (flags & EXT4_ENCRYPT_FL)
44414455
new_fl |= S_ENCRYPTED;
44424456
if (flags & EXT4_CASEFOLD_FL)
@@ -4650,7 +4664,7 @@ struct inode *__ext4_iget(struct super_block *sb, unsigned long ino,
46504664
* not initialized on a new filesystem. */
46514665
}
46524666
ei->i_flags = le32_to_cpu(raw_inode->i_flags);
4653-
ext4_set_inode_flags(inode);
4667+
ext4_set_inode_flags(inode, true);
46544668
inode->i_blocks = ext4_inode_blocks(raw_inode, ei);
46554669
ei->i_file_acl = le32_to_cpu(raw_inode->i_file_acl_lo);
46564670
if (ext4_has_feature_64bit(sb))

fs/ext4/ioctl.c

Lines changed: 54 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -292,6 +292,38 @@ static int ext4_ioctl_check_immutable(struct inode *inode, __u32 new_projid,
292292
return 0;
293293
}
294294

295+
static void ext4_dax_dontcache(struct inode *inode, unsigned int flags)
296+
{
297+
struct ext4_inode_info *ei = EXT4_I(inode);
298+
299+
if (S_ISDIR(inode->i_mode))
300+
return;
301+
302+
if (test_opt2(inode->i_sb, DAX_NEVER) ||
303+
test_opt(inode->i_sb, DAX_ALWAYS))
304+
return;
305+
306+
if ((ei->i_flags ^ flags) & EXT4_DAX_FL)
307+
d_mark_dontcache(inode);
308+
}
309+
310+
static bool dax_compatible(struct inode *inode, unsigned int oldflags,
311+
unsigned int flags)
312+
{
313+
if (flags & EXT4_DAX_FL) {
314+
if ((oldflags & EXT4_DAX_MUT_EXCL) ||
315+
ext4_test_inode_state(inode,
316+
EXT4_STATE_VERITY_IN_PROGRESS)) {
317+
return false;
318+
}
319+
}
320+
321+
if ((flags & EXT4_DAX_MUT_EXCL) && (oldflags & EXT4_DAX_FL))
322+
return false;
323+
324+
return true;
325+
}
326+
295327
static int ext4_ioctl_setflags(struct inode *inode,
296328
unsigned int flags)
297329
{
@@ -300,7 +332,6 @@ static int ext4_ioctl_setflags(struct inode *inode,
300332
int err = -EPERM, migrate = 0;
301333
struct ext4_iloc iloc;
302334
unsigned int oldflags, mask, i;
303-
unsigned int jflag;
304335
struct super_block *sb = inode->i_sb;
305336

306337
/* Is it quota file? Do not allow user to mess with it */
@@ -309,9 +340,6 @@ static int ext4_ioctl_setflags(struct inode *inode,
309340

310341
oldflags = ei->i_flags;
311342

312-
/* The JOURNAL_DATA flag is modifiable only by root */
313-
jflag = flags & EXT4_JOURNAL_DATA_FL;
314-
315343
err = vfs_ioc_setflags_prepare(inode, oldflags, flags);
316344
if (err)
317345
goto flags_out;
@@ -320,10 +348,16 @@ static int ext4_ioctl_setflags(struct inode *inode,
320348
* The JOURNAL_DATA flag can only be changed by
321349
* the relevant capability.
322350
*/
323-
if ((jflag ^ oldflags) & (EXT4_JOURNAL_DATA_FL)) {
351+
if ((flags ^ oldflags) & (EXT4_JOURNAL_DATA_FL)) {
324352
if (!capable(CAP_SYS_RESOURCE))
325353
goto flags_out;
326354
}
355+
356+
if (!dax_compatible(inode, oldflags, flags)) {
357+
err = -EOPNOTSUPP;
358+
goto flags_out;
359+
}
360+
327361
if ((flags ^ oldflags) & EXT4_EXTENTS_FL)
328362
migrate = 1;
329363

@@ -369,6 +403,8 @@ static int ext4_ioctl_setflags(struct inode *inode,
369403
if (err)
370404
goto flags_err;
371405

406+
ext4_dax_dontcache(inode, flags);
407+
372408
for (i = 0, mask = 1; i < 32; i++, mask <<= 1) {
373409
if (!(mask & EXT4_FL_USER_MODIFIABLE))
374410
continue;
@@ -381,7 +417,8 @@ static int ext4_ioctl_setflags(struct inode *inode,
381417
ext4_clear_inode_flag(inode, i);
382418
}
383419

384-
ext4_set_inode_flags(inode);
420+
ext4_set_inode_flags(inode, false);
421+
385422
inode->i_ctime = current_time(inode);
386423

387424
err = ext4_mark_iloc_dirty(handle, inode, &iloc);
@@ -390,17 +427,18 @@ static int ext4_ioctl_setflags(struct inode *inode,
390427
if (err)
391428
goto flags_out;
392429

393-
if ((jflag ^ oldflags) & (EXT4_JOURNAL_DATA_FL)) {
430+
if ((flags ^ oldflags) & (EXT4_JOURNAL_DATA_FL)) {
394431
/*
395432
* Changes to the journaling mode can cause unsafe changes to
396-
* S_DAX if we are using the DAX mount option.
433+
* S_DAX if the inode is DAX
397434
*/
398-
if (test_opt(inode->i_sb, DAX)) {
435+
if (IS_DAX(inode)) {
399436
err = -EBUSY;
400437
goto flags_out;
401438
}
402439

403-
err = ext4_change_inode_journal_flag(inode, jflag);
440+
err = ext4_change_inode_journal_flag(inode,
441+
flags & EXT4_JOURNAL_DATA_FL);
404442
if (err)
405443
goto flags_out;
406444
}
@@ -527,12 +565,15 @@ static inline __u32 ext4_iflags_to_xflags(unsigned long iflags)
527565
xflags |= FS_XFLAG_NOATIME;
528566
if (iflags & EXT4_PROJINHERIT_FL)
529567
xflags |= FS_XFLAG_PROJINHERIT;
568+
if (iflags & EXT4_DAX_FL)
569+
xflags |= FS_XFLAG_DAX;
530570
return xflags;
531571
}
532572

533573
#define EXT4_SUPPORTED_FS_XFLAGS (FS_XFLAG_SYNC | FS_XFLAG_IMMUTABLE | \
534574
FS_XFLAG_APPEND | FS_XFLAG_NODUMP | \
535-
FS_XFLAG_NOATIME | FS_XFLAG_PROJINHERIT)
575+
FS_XFLAG_NOATIME | FS_XFLAG_PROJINHERIT | \
576+
FS_XFLAG_DAX)
536577

537578
/* Transfer xflags flags to internal */
538579
static inline unsigned long ext4_xflags_to_iflags(__u32 xflags)
@@ -551,6 +592,8 @@ static inline unsigned long ext4_xflags_to_iflags(__u32 xflags)
551592
iflags |= EXT4_NOATIME_FL;
552593
if (xflags & FS_XFLAG_PROJINHERIT)
553594
iflags |= EXT4_PROJINHERIT_FL;
595+
if (xflags & FS_XFLAG_DAX)
596+
iflags |= EXT4_DAX_FL;
554597

555598
return iflags;
556599
}

fs/ext4/mballoc.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4708,7 +4708,7 @@ ext4_fsblk_t ext4_mb_new_blocks(handle_t *handle,
47084708
}
47094709

47104710
ac->ac_op = EXT4_MB_HISTORY_PREALLOC;
4711-
seq = *this_cpu_ptr(&discard_pa_seq);
4711+
seq = this_cpu_read(discard_pa_seq);
47124712
if (!ext4_mb_use_preallocated(ac)) {
47134713
ac->ac_op = EXT4_MB_HISTORY_ALLOC;
47144714
ext4_mb_normalize_request(ac, ar);

0 commit comments

Comments
 (0)