Skip to content

Commit 15c981d

Browse files
committed
Merge tag 'for-5.7-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux
Pull btrfs updates from David Sterba: "A number of core changes that make things work better in general, code is simpler and cleaner. Core changes: - per-inode file extent tree, for in memory tracking of contiguous extent ranges to make sure i_size adjustments are accurate - tree root structures are protected by reference counts, replacing SRCU that did not cover some cases - leak detector for tree root structures - per-transaction pinned extent tracking - buffer heads are replaced by bios for super block access - speedup of extent back reference resolution, on an example test scenario the runtime of send went down from a hour to minutes - factor out locking scheme used for subvolume writer and NOCOW exclusion, abstracted as DREW lock, double reader-writer exclusion (allow either readers or writers) - cleanup and abstract extent allocation policies, preparation for zoned device support - make reflink/clone_range work on inline extents - add more cancellation point for relocation, improves long response from 'balance cancel' - add page migration callback for data pages - switch to guid for uuids, with additional cleanups of the interface - make ranged full fsyncs more efficient - removal of obsolete ioctl flag BTRFS_SUBVOL_CREATE_ASYNC - remove b-tree readahead from delayed refs paths, avoiding seek and read unnecessary blocks Features: - v2 of ioctl to delete subvolumes, allowing to delete by id and more future extensions Fixes: - fix qgroup rescan worker that could block umount - fix crash during unmount due to race with delayed inode workers - fix dellaloc flushing logic that could create unnecessary chunks under heavy load - fix missing file extent item for hole after ranged fsync - several fixes in relocation error handling Other: - more documentation of relocation, device replace, space reservations - many random cleanups" * tag 'for-5.7-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux: (210 commits) btrfs: fix missing semaphore unlock in btrfs_sync_file btrfs: use nofs allocations for running delayed items btrfs: sysfs: Use scnprintf() instead of snprintf() btrfs: do not resolve backrefs for roots that are being deleted btrfs: track reloc roots based on their commit root bytenr btrfs: restart relocate_tree_blocks properly btrfs: reloc: reorder reservation before root selection btrfs: do not readahead in build_backref_tree btrfs: do not use readahead for running delayed refs btrfs: Remove async_transid from btrfs_mksubvol/create_subvol/create_snapshot btrfs: Remove transid argument from btrfs_ioctl_snap_create_transid btrfs: Remove BTRFS_SUBVOL_CREATE_ASYNC support btrfs: kill the subvol_srcu btrfs: make btrfs_cleanup_fs_roots use the radix tree lock btrfs: don't take an extra root ref at allocation time btrfs: hold a ref on the root on the dead roots list btrfs: make inodes hold a ref on their roots btrfs: move the root freeing stuff into btrfs_put_root btrfs: move ino_cache_inode dropping out of btrfs_free_fs_root btrfs: make the extent buffer leak check per fs info ...
2 parents 1455c69 + 6ff0672 commit 15c981d

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

65 files changed

+4462
-3536
lines changed

fs/btrfs/Makefile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ btrfs-y += super.o ctree.o extent-tree.o print-tree.o root-tree.o dir-item.o \
1111
compression.o delayed-ref.o relocation.o delayed-inode.o scrub.o \
1212
reada.o backref.o ulist.o qgroup.o send.o dev-replace.o raid56.o \
1313
uuid-tree.o props.o free-space-tree.o tree-checker.o space-info.o \
14-
block-rsv.o delalloc-space.o block-group.o discard.o
14+
block-rsv.o delalloc-space.o block-group.o discard.o reflink.o
1515

1616
btrfs-$(CONFIG_BTRFS_FS_POSIX_ACL) += acl.o
1717
btrfs-$(CONFIG_BTRFS_FS_CHECK_INTEGRITY) += check-integrity.o

fs/btrfs/async-thread.c

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -395,3 +395,11 @@ void btrfs_set_work_high_priority(struct btrfs_work *work)
395395
{
396396
set_bit(WORK_HIGH_PRIO_BIT, &work->flags);
397397
}
398+
399+
void btrfs_flush_workqueue(struct btrfs_workqueue *wq)
400+
{
401+
if (wq->high)
402+
flush_workqueue(wq->high->normal_wq);
403+
404+
flush_workqueue(wq->normal->normal_wq);
405+
}

fs/btrfs/async-thread.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,5 +44,6 @@ void btrfs_set_work_high_priority(struct btrfs_work *work);
4444
struct btrfs_fs_info * __pure btrfs_work_owner(const struct btrfs_work *work);
4545
struct btrfs_fs_info * __pure btrfs_workqueue_owner(const struct __btrfs_workqueue *wq);
4646
bool btrfs_workqueue_normal_congested(const struct btrfs_workqueue *wq);
47+
void btrfs_flush_workqueue(struct btrfs_workqueue *wq);
4748

4849
#endif

fs/btrfs/backref.c

Lines changed: 114 additions & 71 deletions
Original file line numberDiff line numberDiff line change
@@ -347,33 +347,10 @@ static int add_prelim_ref(const struct btrfs_fs_info *fs_info,
347347
return -ENOMEM;
348348

349349
ref->root_id = root_id;
350-
if (key) {
350+
if (key)
351351
ref->key_for_search = *key;
352-
/*
353-
* We can often find data backrefs with an offset that is too
354-
* large (>= LLONG_MAX, maximum allowed file offset) due to
355-
* underflows when subtracting a file's offset with the data
356-
* offset of its corresponding extent data item. This can
357-
* happen for example in the clone ioctl.
358-
* So if we detect such case we set the search key's offset to
359-
* zero to make sure we will find the matching file extent item
360-
* at add_all_parents(), otherwise we will miss it because the
361-
* offset taken form the backref is much larger then the offset
362-
* of the file extent item. This can make us scan a very large
363-
* number of file extent items, but at least it will not make
364-
* us miss any.
365-
* This is an ugly workaround for a behaviour that should have
366-
* never existed, but it does and a fix for the clone ioctl
367-
* would touch a lot of places, cause backwards incompatibility
368-
* and would not fix the problem for extents cloned with older
369-
* kernels.
370-
*/
371-
if (ref->key_for_search.type == BTRFS_EXTENT_DATA_KEY &&
372-
ref->key_for_search.offset >= LLONG_MAX)
373-
ref->key_for_search.offset = 0;
374-
} else {
352+
else
375353
memset(&ref->key_for_search, 0, sizeof(ref->key_for_search));
376-
}
377354

378355
ref->inode_list = NULL;
379356
ref->level = level;
@@ -409,10 +386,36 @@ static int add_indirect_ref(const struct btrfs_fs_info *fs_info,
409386
wanted_disk_byte, count, sc, gfp_mask);
410387
}
411388

389+
static int is_shared_data_backref(struct preftrees *preftrees, u64 bytenr)
390+
{
391+
struct rb_node **p = &preftrees->direct.root.rb_root.rb_node;
392+
struct rb_node *parent = NULL;
393+
struct prelim_ref *ref = NULL;
394+
struct prelim_ref target = {0};
395+
int result;
396+
397+
target.parent = bytenr;
398+
399+
while (*p) {
400+
parent = *p;
401+
ref = rb_entry(parent, struct prelim_ref, rbnode);
402+
result = prelim_ref_compare(ref, &target);
403+
404+
if (result < 0)
405+
p = &(*p)->rb_left;
406+
else if (result > 0)
407+
p = &(*p)->rb_right;
408+
else
409+
return 1;
410+
}
411+
return 0;
412+
}
413+
412414
static int add_all_parents(struct btrfs_root *root, struct btrfs_path *path,
413-
struct ulist *parents, struct prelim_ref *ref,
415+
struct ulist *parents,
416+
struct preftrees *preftrees, struct prelim_ref *ref,
414417
int level, u64 time_seq, const u64 *extent_item_pos,
415-
u64 total_refs, bool ignore_offset)
418+
bool ignore_offset)
416419
{
417420
int ret = 0;
418421
int slot;
@@ -424,6 +427,7 @@ static int add_all_parents(struct btrfs_root *root, struct btrfs_path *path,
424427
u64 disk_byte;
425428
u64 wanted_disk_byte = ref->wanted_disk_byte;
426429
u64 count = 0;
430+
u64 data_offset;
427431

428432
if (level != 0) {
429433
eb = path->nodes[level];
@@ -434,18 +438,26 @@ static int add_all_parents(struct btrfs_root *root, struct btrfs_path *path,
434438
}
435439

436440
/*
437-
* We normally enter this function with the path already pointing to
438-
* the first item to check. But sometimes, we may enter it with
439-
* slot==nritems. In that case, go to the next leaf before we continue.
441+
* 1. We normally enter this function with the path already pointing to
442+
* the first item to check. But sometimes, we may enter it with
443+
* slot == nritems.
444+
* 2. We are searching for normal backref but bytenr of this leaf
445+
* matches shared data backref
446+
* 3. The leaf owner is not equal to the root we are searching
447+
*
448+
* For these cases, go to the next leaf before we continue.
440449
*/
441-
if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
450+
eb = path->nodes[0];
451+
if (path->slots[0] >= btrfs_header_nritems(eb) ||
452+
is_shared_data_backref(preftrees, eb->start) ||
453+
ref->root_id != btrfs_header_owner(eb)) {
442454
if (time_seq == SEQ_LAST)
443455
ret = btrfs_next_leaf(root, path);
444456
else
445457
ret = btrfs_next_old_leaf(root, path, time_seq);
446458
}
447459

448-
while (!ret && count < total_refs) {
460+
while (!ret && count < ref->count) {
449461
eb = path->nodes[0];
450462
slot = path->slots[0];
451463

@@ -455,13 +467,31 @@ static int add_all_parents(struct btrfs_root *root, struct btrfs_path *path,
455467
key.type != BTRFS_EXTENT_DATA_KEY)
456468
break;
457469

470+
/*
471+
* We are searching for normal backref but bytenr of this leaf
472+
* matches shared data backref, OR
473+
* the leaf owner is not equal to the root we are searching for
474+
*/
475+
if (slot == 0 &&
476+
(is_shared_data_backref(preftrees, eb->start) ||
477+
ref->root_id != btrfs_header_owner(eb))) {
478+
if (time_seq == SEQ_LAST)
479+
ret = btrfs_next_leaf(root, path);
480+
else
481+
ret = btrfs_next_old_leaf(root, path, time_seq);
482+
continue;
483+
}
458484
fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
459485
disk_byte = btrfs_file_extent_disk_bytenr(eb, fi);
486+
data_offset = btrfs_file_extent_offset(eb, fi);
460487

461488
if (disk_byte == wanted_disk_byte) {
462489
eie = NULL;
463490
old = NULL;
464-
count++;
491+
if (ref->key_for_search.offset == key.offset - data_offset)
492+
count++;
493+
else
494+
goto next;
465495
if (extent_item_pos) {
466496
ret = check_extent_in_eb(&key, eb, fi,
467497
*extent_item_pos,
@@ -502,33 +532,35 @@ static int add_all_parents(struct btrfs_root *root, struct btrfs_path *path,
502532
*/
503533
static int resolve_indirect_ref(struct btrfs_fs_info *fs_info,
504534
struct btrfs_path *path, u64 time_seq,
535+
struct preftrees *preftrees,
505536
struct prelim_ref *ref, struct ulist *parents,
506-
const u64 *extent_item_pos, u64 total_refs,
507-
bool ignore_offset)
537+
const u64 *extent_item_pos, bool ignore_offset)
508538
{
509539
struct btrfs_root *root;
510540
struct btrfs_key root_key;
511541
struct extent_buffer *eb;
512542
int ret = 0;
513543
int root_level;
514544
int level = ref->level;
515-
int index;
545+
struct btrfs_key search_key = ref->key_for_search;
516546

517547
root_key.objectid = ref->root_id;
518548
root_key.type = BTRFS_ROOT_ITEM_KEY;
519549
root_key.offset = (u64)-1;
520550

521-
index = srcu_read_lock(&fs_info->subvol_srcu);
522-
523551
root = btrfs_get_fs_root(fs_info, &root_key, false);
524552
if (IS_ERR(root)) {
525-
srcu_read_unlock(&fs_info->subvol_srcu, index);
526553
ret = PTR_ERR(root);
554+
goto out_free;
555+
}
556+
557+
if (!path->search_commit_root &&
558+
test_bit(BTRFS_ROOT_DELETING, &root->state)) {
559+
ret = -ENOENT;
527560
goto out;
528561
}
529562

530563
if (btrfs_is_testing(fs_info)) {
531-
srcu_read_unlock(&fs_info->subvol_srcu, index);
532564
ret = -ENOENT;
533565
goto out;
534566
}
@@ -540,21 +572,36 @@ static int resolve_indirect_ref(struct btrfs_fs_info *fs_info,
540572
else
541573
root_level = btrfs_old_root_level(root, time_seq);
542574

543-
if (root_level + 1 == level) {
544-
srcu_read_unlock(&fs_info->subvol_srcu, index);
575+
if (root_level + 1 == level)
545576
goto out;
546-
}
547577

578+
/*
579+
* We can often find data backrefs with an offset that is too large
580+
* (>= LLONG_MAX, maximum allowed file offset) due to underflows when
581+
* subtracting a file's offset with the data offset of its
582+
* corresponding extent data item. This can happen for example in the
583+
* clone ioctl.
584+
*
585+
* So if we detect such case we set the search key's offset to zero to
586+
* make sure we will find the matching file extent item at
587+
* add_all_parents(), otherwise we will miss it because the offset
588+
* taken form the backref is much larger then the offset of the file
589+
* extent item. This can make us scan a very large number of file
590+
* extent items, but at least it will not make us miss any.
591+
*
592+
* This is an ugly workaround for a behaviour that should have never
593+
* existed, but it does and a fix for the clone ioctl would touch a lot
594+
* of places, cause backwards incompatibility and would not fix the
595+
* problem for extents cloned with older kernels.
596+
*/
597+
if (search_key.type == BTRFS_EXTENT_DATA_KEY &&
598+
search_key.offset >= LLONG_MAX)
599+
search_key.offset = 0;
548600
path->lowest_level = level;
549601
if (time_seq == SEQ_LAST)
550-
ret = btrfs_search_slot(NULL, root, &ref->key_for_search, path,
551-
0, 0);
602+
ret = btrfs_search_slot(NULL, root, &search_key, path, 0, 0);
552603
else
553-
ret = btrfs_search_old_slot(root, &ref->key_for_search, path,
554-
time_seq);
555-
556-
/* root node has been locked, we can release @subvol_srcu safely here */
557-
srcu_read_unlock(&fs_info->subvol_srcu, index);
604+
ret = btrfs_search_old_slot(root, &search_key, path, time_seq);
558605

559606
btrfs_debug(fs_info,
560607
"search slot in root %llu (level %d, ref count %d) returned %d for key (%llu %u %llu)",
@@ -574,9 +621,11 @@ static int resolve_indirect_ref(struct btrfs_fs_info *fs_info,
574621
eb = path->nodes[level];
575622
}
576623

577-
ret = add_all_parents(root, path, parents, ref, level, time_seq,
578-
extent_item_pos, total_refs, ignore_offset);
624+
ret = add_all_parents(root, path, parents, preftrees, ref, level,
625+
time_seq, extent_item_pos, ignore_offset);
579626
out:
627+
btrfs_put_root(root);
628+
out_free:
580629
path->lowest_level = 0;
581630
btrfs_release_path(path);
582631
return ret;
@@ -609,7 +658,7 @@ unode_aux_to_inode_list(struct ulist_node *node)
609658
static int resolve_indirect_refs(struct btrfs_fs_info *fs_info,
610659
struct btrfs_path *path, u64 time_seq,
611660
struct preftrees *preftrees,
612-
const u64 *extent_item_pos, u64 total_refs,
661+
const u64 *extent_item_pos,
613662
struct share_check *sc, bool ignore_offset)
614663
{
615664
int err;
@@ -653,9 +702,9 @@ static int resolve_indirect_refs(struct btrfs_fs_info *fs_info,
653702
ret = BACKREF_FOUND_SHARED;
654703
goto out;
655704
}
656-
err = resolve_indirect_ref(fs_info, path, time_seq, ref,
657-
parents, extent_item_pos,
658-
total_refs, ignore_offset);
705+
err = resolve_indirect_ref(fs_info, path, time_seq, preftrees,
706+
ref, parents, extent_item_pos,
707+
ignore_offset);
659708
/*
660709
* we can only tolerate ENOENT,otherwise,we should catch error
661710
* and return directly.
@@ -758,8 +807,7 @@ static int add_missing_keys(struct btrfs_fs_info *fs_info,
758807
*/
759808
static int add_delayed_refs(const struct btrfs_fs_info *fs_info,
760809
struct btrfs_delayed_ref_head *head, u64 seq,
761-
struct preftrees *preftrees, u64 *total_refs,
762-
struct share_check *sc)
810+
struct preftrees *preftrees, struct share_check *sc)
763811
{
764812
struct btrfs_delayed_ref_node *node;
765813
struct btrfs_delayed_extent_op *extent_op = head->extent_op;
@@ -793,7 +841,6 @@ static int add_delayed_refs(const struct btrfs_fs_info *fs_info,
793841
default:
794842
BUG();
795843
}
796-
*total_refs += count;
797844
switch (node->type) {
798845
case BTRFS_TREE_BLOCK_REF_KEY: {
799846
/* NORMAL INDIRECT METADATA backref */
@@ -876,7 +923,7 @@ static int add_delayed_refs(const struct btrfs_fs_info *fs_info,
876923
static int add_inline_refs(const struct btrfs_fs_info *fs_info,
877924
struct btrfs_path *path, u64 bytenr,
878925
int *info_level, struct preftrees *preftrees,
879-
u64 *total_refs, struct share_check *sc)
926+
struct share_check *sc)
880927
{
881928
int ret = 0;
882929
int slot;
@@ -900,7 +947,6 @@ static int add_inline_refs(const struct btrfs_fs_info *fs_info,
900947

901948
ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
902949
flags = btrfs_extent_flags(leaf, ei);
903-
*total_refs += btrfs_extent_refs(leaf, ei);
904950
btrfs_item_key_to_cpu(leaf, &found_key, slot);
905951

906952
ptr = (unsigned long)(ei + 1);
@@ -1125,8 +1171,6 @@ static int find_parent_nodes(struct btrfs_trans_handle *trans,
11251171
struct prelim_ref *ref;
11261172
struct rb_node *node;
11271173
struct extent_inode_elem *eie = NULL;
1128-
/* total of both direct AND indirect refs! */
1129-
u64 total_refs = 0;
11301174
struct preftrees preftrees = {
11311175
.direct = PREFTREE_INIT,
11321176
.indirect = PREFTREE_INIT,
@@ -1195,7 +1239,7 @@ static int find_parent_nodes(struct btrfs_trans_handle *trans,
11951239
}
11961240
spin_unlock(&delayed_refs->lock);
11971241
ret = add_delayed_refs(fs_info, head, time_seq,
1198-
&preftrees, &total_refs, sc);
1242+
&preftrees, sc);
11991243
mutex_unlock(&head->mutex);
12001244
if (ret)
12011245
goto out;
@@ -1216,8 +1260,7 @@ static int find_parent_nodes(struct btrfs_trans_handle *trans,
12161260
(key.type == BTRFS_EXTENT_ITEM_KEY ||
12171261
key.type == BTRFS_METADATA_ITEM_KEY)) {
12181262
ret = add_inline_refs(fs_info, path, bytenr,
1219-
&info_level, &preftrees,
1220-
&total_refs, sc);
1263+
&info_level, &preftrees, sc);
12211264
if (ret)
12221265
goto out;
12231266
ret = add_keyed_refs(fs_info, path, bytenr, info_level,
@@ -1236,7 +1279,7 @@ static int find_parent_nodes(struct btrfs_trans_handle *trans,
12361279
WARN_ON(!RB_EMPTY_ROOT(&preftrees.indirect_missing_keys.root.rb_root));
12371280

12381281
ret = resolve_indirect_refs(fs_info, path, time_seq, &preftrees,
1239-
extent_item_pos, total_refs, sc, ignore_offset);
1282+
extent_item_pos, sc, ignore_offset);
12401283
if (ret)
12411284
goto out;
12421285

@@ -1362,10 +1405,10 @@ static void free_leaf_list(struct ulist *blocks)
13621405
*
13631406
* returns 0 on success, <0 on error
13641407
*/
1365-
static int btrfs_find_all_leafs(struct btrfs_trans_handle *trans,
1366-
struct btrfs_fs_info *fs_info, u64 bytenr,
1367-
u64 time_seq, struct ulist **leafs,
1368-
const u64 *extent_item_pos, bool ignore_offset)
1408+
int btrfs_find_all_leafs(struct btrfs_trans_handle *trans,
1409+
struct btrfs_fs_info *fs_info, u64 bytenr,
1410+
u64 time_seq, struct ulist **leafs,
1411+
const u64 *extent_item_pos, bool ignore_offset)
13691412
{
13701413
int ret;
13711414

fs/btrfs/backref.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,10 @@ int iterate_inodes_from_logical(u64 logical, struct btrfs_fs_info *fs_info,
4040

4141
int paths_from_inode(u64 inum, struct inode_fs_paths *ipath);
4242

43+
int btrfs_find_all_leafs(struct btrfs_trans_handle *trans,
44+
struct btrfs_fs_info *fs_info, u64 bytenr,
45+
u64 time_seq, struct ulist **leafs,
46+
const u64 *extent_item_pos, bool ignore_offset);
4347
int btrfs_find_all_roots(struct btrfs_trans_handle *trans,
4448
struct btrfs_fs_info *fs_info, u64 bytenr,
4549
u64 time_seq, struct ulist **roots, bool ignore_offset);

0 commit comments

Comments
 (0)