@@ -9078,9 +9078,9 @@ static ssize_t btrfs_encoded_read_inline(
90789078}
90799079
90809080struct btrfs_encoded_read_private {
9081- wait_queue_head_t wait ;
9081+ struct completion done ;
90829082 void * uring_ctx ;
9083- atomic_t pending ;
9083+ refcount_t pending_refs ;
90849084 blk_status_t status ;
90859085};
90869086
@@ -9099,14 +9099,14 @@ static void btrfs_encoded_read_endio(struct btrfs_bio *bbio)
90999099 */
91009100 WRITE_ONCE (priv -> status , bbio -> bio .bi_status );
91019101 }
9102- if (atomic_dec_and_test (& priv -> pending )) {
9102+ if (refcount_dec_and_test (& priv -> pending_refs )) {
91039103 int err = blk_status_to_errno (READ_ONCE (priv -> status ));
91049104
91059105 if (priv -> uring_ctx ) {
91069106 btrfs_uring_read_extent_endio (priv -> uring_ctx , err );
91079107 kfree (priv );
91089108 } else {
9109- wake_up (& priv -> wait );
9109+ complete (& priv -> done );
91109110 }
91119111 }
91129112 bio_put (& bbio -> bio );
@@ -9126,8 +9126,8 @@ int btrfs_encoded_read_regular_fill_pages(struct btrfs_inode *inode,
91269126 if (!priv )
91279127 return - ENOMEM ;
91289128
9129- init_waitqueue_head (& priv -> wait );
9130- atomic_set (& priv -> pending , 1 );
9129+ init_completion (& priv -> done );
9130+ refcount_set (& priv -> pending_refs , 1 );
91319131 priv -> status = 0 ;
91329132 priv -> uring_ctx = uring_ctx ;
91339133
@@ -9140,7 +9140,7 @@ int btrfs_encoded_read_regular_fill_pages(struct btrfs_inode *inode,
91409140 size_t bytes = min_t (u64 , disk_io_size , PAGE_SIZE );
91419141
91429142 if (bio_add_page (& bbio -> bio , pages [i ], bytes , 0 ) < bytes ) {
9143- atomic_inc (& priv -> pending );
9143+ refcount_inc (& priv -> pending_refs );
91449144 btrfs_submit_bbio (bbio , 0 );
91459145
91469146 bbio = btrfs_bio_alloc (BIO_MAX_VECS , REQ_OP_READ , fs_info ,
@@ -9155,11 +9155,11 @@ int btrfs_encoded_read_regular_fill_pages(struct btrfs_inode *inode,
91559155 disk_io_size -= bytes ;
91569156 } while (disk_io_size );
91579157
9158- atomic_inc (& priv -> pending );
9158+ refcount_inc (& priv -> pending_refs );
91599159 btrfs_submit_bbio (bbio , 0 );
91609160
91619161 if (uring_ctx ) {
9162- if (atomic_dec_return (& priv -> pending ) == 0 ) {
9162+ if (refcount_dec_and_test (& priv -> pending_refs ) ) {
91639163 ret = blk_status_to_errno (READ_ONCE (priv -> status ));
91649164 btrfs_uring_read_extent_endio (uring_ctx , ret );
91659165 kfree (priv );
@@ -9168,8 +9168,8 @@ int btrfs_encoded_read_regular_fill_pages(struct btrfs_inode *inode,
91689168
91699169 return - EIOCBQUEUED ;
91709170 } else {
9171- if (atomic_dec_return (& priv -> pending ) != 0 )
9172- io_wait_event ( priv -> wait , ! atomic_read ( & priv -> pending ) );
9171+ if (! refcount_dec_and_test (& priv -> pending_refs ) )
9172+ wait_for_completion_io ( & priv -> done );
91739173 /* See btrfs_encoded_read_endio() for ordering. */
91749174 ret = blk_status_to_errno (READ_ONCE (priv -> status ));
91759175 kfree (priv );
@@ -9799,15 +9799,25 @@ static int btrfs_swap_activate(struct swap_info_struct *sis, struct file *file,
97999799 struct btrfs_fs_info * fs_info = root -> fs_info ;
98009800 struct extent_io_tree * io_tree = & BTRFS_I (inode )-> io_tree ;
98019801 struct extent_state * cached_state = NULL ;
9802- struct extent_map * em = NULL ;
98039802 struct btrfs_chunk_map * map = NULL ;
98049803 struct btrfs_device * device = NULL ;
98059804 struct btrfs_swap_info bsi = {
98069805 .lowest_ppage = (sector_t )- 1ULL ,
98079806 };
9807+ struct btrfs_backref_share_check_ctx * backref_ctx = NULL ;
9808+ struct btrfs_path * path = NULL ;
98089809 int ret = 0 ;
98099810 u64 isize ;
9810- u64 start ;
9811+ u64 prev_extent_end = 0 ;
9812+
9813+ /*
9814+ * Acquire the inode's mmap lock to prevent races with memory mapped
9815+ * writes, as they could happen after we flush delalloc below and before
9816+ * we lock the extent range further below. The inode was already locked
9817+ * up in the call chain.
9818+ */
9819+ btrfs_assert_inode_locked (BTRFS_I (inode ));
9820+ down_write (& BTRFS_I (inode )-> i_mmap_lock );
98119821
98129822 /*
98139823 * If the swap file was just created, make sure delalloc is done. If the
@@ -9816,22 +9826,32 @@ static int btrfs_swap_activate(struct swap_info_struct *sis, struct file *file,
98169826 */
98179827 ret = btrfs_wait_ordered_range (BTRFS_I (inode ), 0 , (u64 )- 1 );
98189828 if (ret )
9819- return ret ;
9829+ goto out_unlock_mmap ;
98209830
98219831 /*
98229832 * The inode is locked, so these flags won't change after we check them.
98239833 */
98249834 if (BTRFS_I (inode )-> flags & BTRFS_INODE_COMPRESS ) {
98259835 btrfs_warn (fs_info , "swapfile must not be compressed" );
9826- return - EINVAL ;
9836+ ret = - EINVAL ;
9837+ goto out_unlock_mmap ;
98279838 }
98289839 if (!(BTRFS_I (inode )-> flags & BTRFS_INODE_NODATACOW )) {
98299840 btrfs_warn (fs_info , "swapfile must not be copy-on-write" );
9830- return - EINVAL ;
9841+ ret = - EINVAL ;
9842+ goto out_unlock_mmap ;
98319843 }
98329844 if (!(BTRFS_I (inode )-> flags & BTRFS_INODE_NODATASUM )) {
98339845 btrfs_warn (fs_info , "swapfile must not be checksummed" );
9834- return - EINVAL ;
9846+ ret = - EINVAL ;
9847+ goto out_unlock_mmap ;
9848+ }
9849+
9850+ path = btrfs_alloc_path ();
9851+ backref_ctx = btrfs_alloc_backref_share_check_ctx ();
9852+ if (!path || !backref_ctx ) {
9853+ ret = - ENOMEM ;
9854+ goto out_unlock_mmap ;
98359855 }
98369856
98379857 /*
@@ -9846,7 +9866,8 @@ static int btrfs_swap_activate(struct swap_info_struct *sis, struct file *file,
98469866 if (!btrfs_exclop_start (fs_info , BTRFS_EXCLOP_SWAP_ACTIVATE )) {
98479867 btrfs_warn (fs_info ,
98489868 "cannot activate swapfile while exclusive operation is running" );
9849- return - EBUSY ;
9869+ ret = - EBUSY ;
9870+ goto out_unlock_mmap ;
98509871 }
98519872
98529873 /*
@@ -9860,7 +9881,8 @@ static int btrfs_swap_activate(struct swap_info_struct *sis, struct file *file,
98609881 btrfs_exclop_finish (fs_info );
98619882 btrfs_warn (fs_info ,
98629883 "cannot activate swapfile because snapshot creation is in progress" );
9863- return - EINVAL ;
9884+ ret = - EINVAL ;
9885+ goto out_unlock_mmap ;
98649886 }
98659887 /*
98669888 * Snapshots can create extents which require COW even if NODATACOW is
@@ -9881,32 +9903,48 @@ static int btrfs_swap_activate(struct swap_info_struct *sis, struct file *file,
98819903 btrfs_warn (fs_info ,
98829904 "cannot activate swapfile because subvolume %llu is being deleted" ,
98839905 btrfs_root_id (root ));
9884- return - EPERM ;
9906+ ret = - EPERM ;
9907+ goto out_unlock_mmap ;
98859908 }
98869909 atomic_inc (& root -> nr_swapfiles );
98879910 spin_unlock (& root -> root_item_lock );
98889911
98899912 isize = ALIGN_DOWN (inode -> i_size , fs_info -> sectorsize );
98909913
98919914 lock_extent (io_tree , 0 , isize - 1 , & cached_state );
9892- start = 0 ;
9893- while (start < isize ) {
9894- u64 logical_block_start , physical_block_start ;
9915+ while (prev_extent_end < isize ) {
9916+ struct btrfs_key key ;
9917+ struct extent_buffer * leaf ;
9918+ struct btrfs_file_extent_item * ei ;
98959919 struct btrfs_block_group * bg ;
9896- u64 len = isize - start ;
9920+ u64 logical_block_start ;
9921+ u64 physical_block_start ;
9922+ u64 extent_gen ;
9923+ u64 disk_bytenr ;
9924+ u64 len ;
98979925
9898- em = btrfs_get_extent (BTRFS_I (inode ), NULL , start , len );
9899- if (IS_ERR (em )) {
9900- ret = PTR_ERR (em );
9926+ key .objectid = btrfs_ino (BTRFS_I (inode ));
9927+ key .type = BTRFS_EXTENT_DATA_KEY ;
9928+ key .offset = prev_extent_end ;
9929+
9930+ ret = btrfs_search_slot (NULL , root , & key , path , 0 , 0 );
9931+ if (ret < 0 )
99019932 goto out ;
9902- }
99039933
9904- if (em -> disk_bytenr == EXTENT_MAP_HOLE ) {
9934+ /*
9935+ * If key not found it means we have an implicit hole (NO_HOLES
9936+ * is enabled).
9937+ */
9938+ if (ret > 0 ) {
99059939 btrfs_warn (fs_info , "swapfile must not have holes" );
99069940 ret = - EINVAL ;
99079941 goto out ;
99089942 }
9909- if (em -> disk_bytenr == EXTENT_MAP_INLINE ) {
9943+
9944+ leaf = path -> nodes [0 ];
9945+ ei = btrfs_item_ptr (leaf , path -> slots [0 ], struct btrfs_file_extent_item );
9946+
9947+ if (btrfs_file_extent_type (leaf , ei ) == BTRFS_FILE_EXTENT_INLINE ) {
99109948 /*
99119949 * It's unlikely we'll ever actually find ourselves
99129950 * here, as a file small enough to fit inline won't be
@@ -9918,23 +9956,45 @@ static int btrfs_swap_activate(struct swap_info_struct *sis, struct file *file,
99189956 ret = - EINVAL ;
99199957 goto out ;
99209958 }
9921- if (extent_map_is_compressed (em )) {
9959+
9960+ if (btrfs_file_extent_compression (leaf , ei ) != BTRFS_COMPRESS_NONE ) {
99229961 btrfs_warn (fs_info , "swapfile must not be compressed" );
99239962 ret = - EINVAL ;
99249963 goto out ;
99259964 }
99269965
9927- logical_block_start = extent_map_block_start (em ) + (start - em -> start );
9928- len = min (len , em -> len - (start - em -> start ));
9929- free_extent_map (em );
9930- em = NULL ;
9966+ disk_bytenr = btrfs_file_extent_disk_bytenr (leaf , ei );
9967+ if (disk_bytenr == 0 ) {
9968+ btrfs_warn (fs_info , "swapfile must not have holes" );
9969+ ret = - EINVAL ;
9970+ goto out ;
9971+ }
9972+
9973+ logical_block_start = disk_bytenr + btrfs_file_extent_offset (leaf , ei );
9974+ extent_gen = btrfs_file_extent_generation (leaf , ei );
9975+ prev_extent_end = btrfs_file_extent_end (path );
99319976
9932- ret = can_nocow_extent (inode , start , & len , NULL , false, true);
9977+ if (prev_extent_end > isize )
9978+ len = isize - key .offset ;
9979+ else
9980+ len = btrfs_file_extent_num_bytes (leaf , ei );
9981+
9982+ backref_ctx -> curr_leaf_bytenr = leaf -> start ;
9983+
9984+ /*
9985+ * Don't need the path anymore, release to avoid deadlocks when
9986+ * calling btrfs_is_data_extent_shared() because when joining a
9987+ * transaction it can block waiting for the current one's commit
9988+ * which in turn may be trying to lock the same leaf to flush
9989+ * delayed items for example.
9990+ */
9991+ btrfs_release_path (path );
9992+
9993+ ret = btrfs_is_data_extent_shared (BTRFS_I (inode ), disk_bytenr ,
9994+ extent_gen , backref_ctx );
99339995 if (ret < 0 ) {
99349996 goto out ;
9935- } else if (ret ) {
9936- ret = 0 ;
9937- } else {
9997+ } else if (ret > 0 ) {
99389998 btrfs_warn (fs_info ,
99399999 "swapfile must not be copy-on-write" );
994010000 ret = - EINVAL ;
@@ -9969,7 +10029,6 @@ static int btrfs_swap_activate(struct swap_info_struct *sis, struct file *file,
996910029
997010030 physical_block_start = (map -> stripes [0 ].physical +
997110031 (logical_block_start - map -> start ));
9972- len = min (len , map -> chunk_len - (logical_block_start - map -> start ));
997310032 btrfs_free_chunk_map (map );
997410033 map = NULL ;
997510034
@@ -10010,20 +10069,23 @@ static int btrfs_swap_activate(struct swap_info_struct *sis, struct file *file,
1001010069 if (ret )
1001110070 goto out ;
1001210071 }
10013- bsi .start = start ;
10072+ bsi .start = key . offset ;
1001410073 bsi .block_start = physical_block_start ;
1001510074 bsi .block_len = len ;
1001610075 }
1001710076
10018- start += len ;
10077+ if (fatal_signal_pending (current )) {
10078+ ret = - EINTR ;
10079+ goto out ;
10080+ }
10081+
10082+ cond_resched ();
1001910083 }
1002010084
1002110085 if (bsi .block_len )
1002210086 ret = btrfs_add_swap_extent (sis , & bsi );
1002310087
1002410088out :
10025- if (!IS_ERR_OR_NULL (em ))
10026- free_extent_map (em );
1002710089 if (!IS_ERR_OR_NULL (map ))
1002810090 btrfs_free_chunk_map (map );
1002910091
@@ -10036,6 +10098,10 @@ static int btrfs_swap_activate(struct swap_info_struct *sis, struct file *file,
1003610098
1003710099 btrfs_exclop_finish (fs_info );
1003810100
10101+ out_unlock_mmap :
10102+ up_write (& BTRFS_I (inode )-> i_mmap_lock );
10103+ btrfs_free_backref_share_ctx (backref_ctx );
10104+ btrfs_free_path (path );
1003910105 if (ret )
1004010106 return ret ;
1004110107
0 commit comments