Skip to content

Commit 87045e6

Browse files
committed
Merge tag 'for-5.15-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux
Pull btrfs updates from David Sterba: "The highlights of this round are integrations with fs-verity and idmapped mounts, the rest is usual mix of minor improvements, speedups and cleanups. There are some patches outside of btrfs, namely updating some VFS interfaces, all straightforward and acked. Features: - fs-verity support, using standard ioctls, backward compatible with read-only limitation on inodes with previously enabled fs-verity - idmapped mount support - make mount with rescue=ibadroots more tolerant to partially damaged trees - allow raid0 on a single device and raid10 on two devices, degenerate cases but might be useful as an intermediate step during conversion to other profiles - zoned mode block group auto reclaim can be disabled via sysfs knob Performance improvements: - continue readahead of node siblings even if target node is in memory, could speed up full send (on sample test +11%) - batching of delayed items can speed up creating many files - fsync/tree-log speedups - avoid unnecessary work (gains +2% throughput, -2% run time on sample load) - reduced lock contention on renames (on dbench +4% throughput, up to -30% latency) Fixes: - various zoned mode fixes - preemptive flushing threshold tuning, avoid excessive work on almost full filesystems Core: - continued subpage support, preparation for implementing remaining features like compression and defragmentation; with some limitations, write is now enabled on 64K page systems with 4K sectors, still considered experimental - no readahead on compressed reads - inline extents disabled - disabled raid56 profile conversion and mount - improved flushing logic, fixing early ENOSPC on some workloads - inode flags have been internally split to read-only and read-write incompat bit parts, used by fs-verity - new tree items for fs-verity - descriptor item - Merkle tree item - inode operations extended to be namespace-aware - cleanups and refactoring Generic code changes: - fs: new export filemap_fdatawrite_wbc - fs: removed sync_inode - block: bio_trim argument type fixups - vfs: add namespace-aware lookup" * tag 'for-5.15-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux: (114 commits) btrfs: reset replace target device to allocation state on close btrfs: zoned: fix ordered extent boundary calculation btrfs: do not do preemptive flushing if the majority is global rsv btrfs: reduce the preemptive flushing threshold to 90% btrfs: tree-log: check btrfs_lookup_data_extent return value btrfs: avoid unnecessarily logging directories that had no changes btrfs: allow idmapped mount btrfs: handle ACLs on idmapped mounts btrfs: allow idmapped INO_LOOKUP_USER ioctl btrfs: allow idmapped SUBVOL_SETFLAGS ioctl btrfs: allow idmapped SET_RECEIVED_SUBVOL ioctls btrfs: relax restrictions for SNAP_DESTROY_V2 with subvolids btrfs: allow idmapped SNAP_DESTROY ioctls btrfs: allow idmapped SNAP_CREATE/SUBVOL_CREATE ioctls btrfs: check whether fsgid/fsuid are mapped during subvolume creation btrfs: allow idmapped permission inode op btrfs: allow idmapped setattr inode op btrfs: allow idmapped tmpfile inode op btrfs: allow idmapped symlink inode op btrfs: allow idmapped mkdir inode op ...
2 parents 9c849ce + 0d977e0 commit 87045e6

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

58 files changed

+2769
-1425
lines changed

block/bio.c

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1544,12 +1544,15 @@ EXPORT_SYMBOL(bio_split);
15441544
* @bio: bio to trim
15451545
* @offset: number of sectors to trim from the front of @bio
15461546
* @size: size we want to trim @bio to, in sectors
1547+
*
1548+
* This function is typically used for bios that are cloned and submitted
1549+
* to the underlying device in parts.
15471550
*/
1548-
void bio_trim(struct bio *bio, int offset, int size)
1551+
void bio_trim(struct bio *bio, sector_t offset, sector_t size)
15491552
{
1550-
/* 'bio' is a cloned bio which we need to trim to match
1551-
* the given offset and size.
1552-
*/
1553+
if (WARN_ON_ONCE(offset > BIO_MAX_SECTORS || size > BIO_MAX_SECTORS ||
1554+
offset + size > bio->bi_iter.bi_size))
1555+
return;
15531556

15541557
size <<= 9;
15551558
if (offset == 0 && size == bio->bi_iter.bi_size)
@@ -1560,7 +1563,6 @@ void bio_trim(struct bio *bio, int offset, int size)
15601563

15611564
if (bio_integrity(bio))
15621565
bio_integrity_trim(bio);
1563-
15641566
}
15651567
EXPORT_SYMBOL_GPL(bio_trim);
15661568

fs/9p/vfs_file.c

Lines changed: 1 addition & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -612,12 +612,7 @@ static void v9fs_mmap_vm_close(struct vm_area_struct *vma)
612612
p9_debug(P9_DEBUG_VFS, "9p VMA close, %p, flushing", vma);
613613

614614
inode = file_inode(vma->vm_file);
615-
616-
if (!mapping_can_writeback(inode->i_mapping))
617-
wbc.nr_to_write = 0;
618-
619-
might_sleep();
620-
sync_inode(inode, &wbc);
615+
filemap_fdatawrite_wbc(inode->i_mapping, &wbc);
621616
}
622617

623618

fs/btrfs/Makefile

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@ btrfs-$(CONFIG_BTRFS_FS_POSIX_ACL) += acl.o
3636
btrfs-$(CONFIG_BTRFS_FS_CHECK_INTEGRITY) += check-integrity.o
3737
btrfs-$(CONFIG_BTRFS_FS_REF_VERIFY) += ref-verify.o
3838
btrfs-$(CONFIG_BLK_DEV_ZONED) += zoned.o
39+
btrfs-$(CONFIG_FS_VERITY) += verity.o
3940

4041
btrfs-$(CONFIG_BTRFS_FS_RUN_SANITY_TESTS) += tests/free-space-tests.o \
4142
tests/extent-buffer-tests.o tests/btrfs-tests.o \

fs/btrfs/acl.c

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,8 @@ struct posix_acl *btrfs_get_acl(struct inode *inode, int type)
5353
}
5454

5555
static int __btrfs_set_acl(struct btrfs_trans_handle *trans,
56-
struct inode *inode, struct posix_acl *acl, int type)
56+
struct user_namespace *mnt_userns,
57+
struct inode *inode, struct posix_acl *acl, int type)
5758
{
5859
int ret, size = 0;
5960
const char *name;
@@ -114,12 +115,12 @@ int btrfs_set_acl(struct user_namespace *mnt_userns, struct inode *inode,
114115
umode_t old_mode = inode->i_mode;
115116

116117
if (type == ACL_TYPE_ACCESS && acl) {
117-
ret = posix_acl_update_mode(&init_user_ns, inode,
118+
ret = posix_acl_update_mode(mnt_userns, inode,
118119
&inode->i_mode, &acl);
119120
if (ret)
120121
return ret;
121122
}
122-
ret = __btrfs_set_acl(NULL, inode, acl, type);
123+
ret = __btrfs_set_acl(NULL, mnt_userns, inode, acl, type);
123124
if (ret)
124125
inode->i_mode = old_mode;
125126
return ret;
@@ -140,14 +141,14 @@ int btrfs_init_acl(struct btrfs_trans_handle *trans,
140141
return ret;
141142

142143
if (default_acl) {
143-
ret = __btrfs_set_acl(trans, inode, default_acl,
144+
ret = __btrfs_set_acl(trans, &init_user_ns, inode, default_acl,
144145
ACL_TYPE_DEFAULT);
145146
posix_acl_release(default_acl);
146147
}
147148

148149
if (acl) {
149150
if (!ret)
150-
ret = __btrfs_set_acl(trans, inode, acl,
151+
ret = __btrfs_set_acl(trans, &init_user_ns, inode, acl,
151152
ACL_TYPE_ACCESS);
152153
posix_acl_release(acl);
153154
}

fs/btrfs/backref.c

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1211,7 +1211,7 @@ static int find_parent_nodes(struct btrfs_trans_handle *trans,
12111211
again:
12121212
head = NULL;
12131213

1214-
ret = btrfs_search_slot(trans, fs_info->extent_root, &key, path, 0, 0);
1214+
ret = btrfs_search_slot(NULL, fs_info->extent_root, &key, path, 0, 0);
12151215
if (ret < 0)
12161216
goto out;
12171217
BUG_ON(ret == 0);
@@ -1488,14 +1488,14 @@ static int btrfs_find_all_roots_safe(struct btrfs_trans_handle *trans,
14881488
int btrfs_find_all_roots(struct btrfs_trans_handle *trans,
14891489
struct btrfs_fs_info *fs_info, u64 bytenr,
14901490
u64 time_seq, struct ulist **roots,
1491-
bool ignore_offset, bool skip_commit_root_sem)
1491+
bool skip_commit_root_sem)
14921492
{
14931493
int ret;
14941494

14951495
if (!trans && !skip_commit_root_sem)
14961496
down_read(&fs_info->commit_root_sem);
14971497
ret = btrfs_find_all_roots_safe(trans, fs_info, bytenr,
1498-
time_seq, roots, ignore_offset);
1498+
time_seq, roots, false);
14991499
if (!trans && !skip_commit_root_sem)
15001500
up_read(&fs_info->commit_root_sem);
15011501
return ret;

fs/btrfs/backref.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@ int btrfs_find_all_leafs(struct btrfs_trans_handle *trans,
4747
const u64 *extent_item_pos, bool ignore_offset);
4848
int btrfs_find_all_roots(struct btrfs_trans_handle *trans,
4949
struct btrfs_fs_info *fs_info, u64 bytenr,
50-
u64 time_seq, struct ulist **roots, bool ignore_offset,
50+
u64 time_seq, struct ulist **roots,
5151
bool skip_commit_root_sem);
5252
char *btrfs_ref_to_path(struct btrfs_root *fs_root, struct btrfs_path *path,
5353
u32 name_len, unsigned long name_off,

fs/btrfs/block-group.c

Lines changed: 111 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1561,7 +1561,7 @@ void btrfs_reclaim_bgs_work(struct work_struct *work)
15611561
div64_u64(zone_unusable * 100, bg->length));
15621562
trace_btrfs_reclaim_block_group(bg);
15631563
ret = btrfs_relocate_chunk(fs_info, bg->start);
1564-
if (ret)
1564+
if (ret && ret != -EAGAIN)
15651565
btrfs_err(fs_info, "error relocating chunk %llu",
15661566
bg->start);
15671567

@@ -2105,11 +2105,22 @@ static int fill_dummy_bgs(struct btrfs_fs_info *fs_info)
21052105
bg->used = em->len;
21062106
bg->flags = map->type;
21072107
ret = btrfs_add_block_group_cache(fs_info, bg);
2108+
/*
2109+
* We may have some valid block group cache added already, in
2110+
* that case we skip to the next one.
2111+
*/
2112+
if (ret == -EEXIST) {
2113+
ret = 0;
2114+
btrfs_put_block_group(bg);
2115+
continue;
2116+
}
2117+
21082118
if (ret) {
21092119
btrfs_remove_free_space_cache(bg);
21102120
btrfs_put_block_group(bg);
21112121
break;
21122122
}
2123+
21132124
btrfs_update_space_info(fs_info, bg->flags, em->len, em->len,
21142125
0, 0, &space_info);
21152126
bg->space_info = space_info;
@@ -2212,6 +2223,14 @@ int btrfs_read_block_groups(struct btrfs_fs_info *info)
22122223
ret = check_chunk_block_group_mappings(info);
22132224
error:
22142225
btrfs_free_path(path);
2226+
/*
2227+
* We've hit some error while reading the extent tree, and have
2228+
* rescue=ibadroots mount option.
2229+
* Try to fill the tree using dummy block groups so that the user can
2230+
* continue to mount and grab their data.
2231+
*/
2232+
if (ret && btrfs_test_opt(info, IGNOREBADROOTS))
2233+
ret = fill_dummy_bgs(info);
22152234
return ret;
22162235
}
22172236

@@ -2244,6 +2263,95 @@ static int insert_block_group_item(struct btrfs_trans_handle *trans,
22442263
return btrfs_insert_item(trans, root, &key, &bgi, sizeof(bgi));
22452264
}
22462265

2266+
static int insert_dev_extent(struct btrfs_trans_handle *trans,
2267+
struct btrfs_device *device, u64 chunk_offset,
2268+
u64 start, u64 num_bytes)
2269+
{
2270+
struct btrfs_fs_info *fs_info = device->fs_info;
2271+
struct btrfs_root *root = fs_info->dev_root;
2272+
struct btrfs_path *path;
2273+
struct btrfs_dev_extent *extent;
2274+
struct extent_buffer *leaf;
2275+
struct btrfs_key key;
2276+
int ret;
2277+
2278+
WARN_ON(!test_bit(BTRFS_DEV_STATE_IN_FS_METADATA, &device->dev_state));
2279+
WARN_ON(test_bit(BTRFS_DEV_STATE_REPLACE_TGT, &device->dev_state));
2280+
path = btrfs_alloc_path();
2281+
if (!path)
2282+
return -ENOMEM;
2283+
2284+
key.objectid = device->devid;
2285+
key.type = BTRFS_DEV_EXTENT_KEY;
2286+
key.offset = start;
2287+
ret = btrfs_insert_empty_item(trans, root, path, &key, sizeof(*extent));
2288+
if (ret)
2289+
goto out;
2290+
2291+
leaf = path->nodes[0];
2292+
extent = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_dev_extent);
2293+
btrfs_set_dev_extent_chunk_tree(leaf, extent, BTRFS_CHUNK_TREE_OBJECTID);
2294+
btrfs_set_dev_extent_chunk_objectid(leaf, extent,
2295+
BTRFS_FIRST_CHUNK_TREE_OBJECTID);
2296+
btrfs_set_dev_extent_chunk_offset(leaf, extent, chunk_offset);
2297+
2298+
btrfs_set_dev_extent_length(leaf, extent, num_bytes);
2299+
btrfs_mark_buffer_dirty(leaf);
2300+
out:
2301+
btrfs_free_path(path);
2302+
return ret;
2303+
}
2304+
2305+
/*
2306+
* This function belongs to phase 2.
2307+
*
2308+
* See the comment at btrfs_chunk_alloc() for details about the chunk allocation
2309+
* phases.
2310+
*/
2311+
static int insert_dev_extents(struct btrfs_trans_handle *trans,
2312+
u64 chunk_offset, u64 chunk_size)
2313+
{
2314+
struct btrfs_fs_info *fs_info = trans->fs_info;
2315+
struct btrfs_device *device;
2316+
struct extent_map *em;
2317+
struct map_lookup *map;
2318+
u64 dev_offset;
2319+
u64 stripe_size;
2320+
int i;
2321+
int ret = 0;
2322+
2323+
em = btrfs_get_chunk_map(fs_info, chunk_offset, chunk_size);
2324+
if (IS_ERR(em))
2325+
return PTR_ERR(em);
2326+
2327+
map = em->map_lookup;
2328+
stripe_size = em->orig_block_len;
2329+
2330+
/*
2331+
* Take the device list mutex to prevent races with the final phase of
2332+
* a device replace operation that replaces the device object associated
2333+
* with the map's stripes, because the device object's id can change
2334+
* at any time during that final phase of the device replace operation
2335+
* (dev-replace.c:btrfs_dev_replace_finishing()), so we could grab the
2336+
* replaced device and then see it with an ID of BTRFS_DEV_REPLACE_DEVID,
2337+
* resulting in persisting a device extent item with such ID.
2338+
*/
2339+
mutex_lock(&fs_info->fs_devices->device_list_mutex);
2340+
for (i = 0; i < map->num_stripes; i++) {
2341+
device = map->stripes[i].dev;
2342+
dev_offset = map->stripes[i].physical;
2343+
2344+
ret = insert_dev_extent(trans, device, chunk_offset, dev_offset,
2345+
stripe_size);
2346+
if (ret)
2347+
break;
2348+
}
2349+
mutex_unlock(&fs_info->fs_devices->device_list_mutex);
2350+
2351+
free_extent_map(em);
2352+
return ret;
2353+
}
2354+
22472355
/*
22482356
* This function, btrfs_create_pending_block_groups(), belongs to the phase 2 of
22492357
* chunk allocation.
@@ -2278,8 +2386,8 @@ void btrfs_create_pending_block_groups(struct btrfs_trans_handle *trans)
22782386
if (ret)
22792387
btrfs_abort_transaction(trans, ret);
22802388
}
2281-
ret = btrfs_finish_chunk_alloc(trans, block_group->start,
2282-
block_group->length);
2389+
ret = insert_dev_extents(trans, block_group->start,
2390+
block_group->length);
22832391
if (ret)
22842392
btrfs_abort_transaction(trans, ret);
22852393
add_block_group_free_space(trans, block_group);

fs/btrfs/btrfs_inode.h

Lines changed: 26 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,13 @@ enum {
5151
* the file range, inode's io_tree).
5252
*/
5353
BTRFS_INODE_NO_DELALLOC_FLUSH,
54+
/*
55+
* Set when we are working on enabling verity for a file. Computing and
56+
* writing the whole Merkle tree can take a while so we want to prevent
57+
* races where two separate tasks attempt to simultaneously start verity
58+
* on the same file.
59+
*/
60+
BTRFS_INODE_VERITY_IN_PROGRESS,
5461
};
5562

5663
/* in memory btrfs inode */
@@ -189,8 +196,10 @@ struct btrfs_inode {
189196
*/
190197
u64 csum_bytes;
191198

192-
/* flags field from the on disk inode */
199+
/* Backwards incompatible flags, lower half of inode_item::flags */
193200
u32 flags;
201+
/* Read-only compatibility flags, upper half of inode_item::flags */
202+
u32 ro_flags;
194203

195204
/*
196205
* Counters to keep track of the number of extent item's we may use due
@@ -348,6 +357,22 @@ struct btrfs_dio_private {
348357
u8 csums[];
349358
};
350359

360+
/*
361+
* btrfs_inode_item stores flags in a u64, btrfs_inode stores them in two
362+
* separate u32s. These two functions convert between the two representations.
363+
*/
364+
static inline u64 btrfs_inode_combine_flags(u32 flags, u32 ro_flags)
365+
{
366+
return (flags | ((u64)ro_flags << 32));
367+
}
368+
369+
static inline void btrfs_inode_split_flags(u64 inode_item_flags,
370+
u32 *flags, u32 *ro_flags)
371+
{
372+
*flags = (u32)inode_item_flags;
373+
*ro_flags = (u32)(inode_item_flags >> 32);
374+
}
375+
351376
/* Array of bytes with variable length, hexadecimal format 0x1234 */
352377
#define CSUM_FMT "0x%*phN"
353378
#define CSUM_FMT_VALUE(size, bytes) size, bytes

0 commit comments

Comments
 (0)