@@ -653,6 +653,12 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans,
653
653
u64 extent_end ;
654
654
u64 start = key -> offset ;
655
655
u64 nbytes = 0 ;
656
+ u64 csum_start ;
657
+ u64 csum_end ;
658
+ LIST_HEAD (ordered_sums );
659
+ u64 offset ;
660
+ unsigned long dest_offset ;
661
+ struct btrfs_key ins ;
656
662
struct btrfs_file_extent_item * item ;
657
663
struct btrfs_inode * inode = NULL ;
658
664
unsigned long size ;
@@ -730,187 +736,180 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans,
730
736
goto out ;
731
737
}
732
738
733
- if (found_type == BTRFS_FILE_EXTENT_REG ||
734
- found_type == BTRFS_FILE_EXTENT_PREALLOC ) {
735
- u64 csum_start ;
736
- u64 csum_end ;
737
- LIST_HEAD (ordered_sums );
738
- u64 offset ;
739
- unsigned long dest_offset ;
740
- struct btrfs_key ins ;
739
+ if (found_type == BTRFS_FILE_EXTENT_INLINE ) {
740
+ /* inline extents are easy, we just overwrite them */
741
+ ret = overwrite_item (trans , root , path , eb , slot , key );
742
+ if (ret )
743
+ goto out ;
744
+ goto update_inode ;
745
+ }
741
746
742
- if (btrfs_file_extent_disk_bytenr (eb , item ) == 0 &&
743
- btrfs_fs_incompat (fs_info , NO_HOLES ))
744
- goto update_inode ;
747
+ /*
748
+ * If not an inline extent, it can only be a regular or prealloc one.
749
+ * We have checked that above and returned -EUCLEAN if not.
750
+ */
745
751
746
- ret = btrfs_insert_empty_item (trans , root , path , key ,
747
- sizeof (* item ));
748
- if (ret ) {
749
- btrfs_abort_transaction (trans , ret );
750
- goto out ;
751
- }
752
- dest_offset = btrfs_item_ptr_offset (path -> nodes [0 ],
753
- path -> slots [0 ]);
754
- copy_extent_buffer (path -> nodes [0 ], eb , dest_offset ,
755
- (unsigned long )item , sizeof (* item ));
752
+ /* A hole and NO_HOLES feature enabled, nothing else to do. */
753
+ if (btrfs_file_extent_disk_bytenr (eb , item ) == 0 &&
754
+ btrfs_fs_incompat (fs_info , NO_HOLES ))
755
+ goto update_inode ;
756
756
757
- /*
758
- * We have an explicit hole and NO_HOLES is not enabled. We have
759
- * added the hole file extent item to the subvolume tree, so we
760
- * don't have anything else to do other than update the file
761
- * extent item range and update the inode item.
762
- */
763
- if (btrfs_file_extent_disk_bytenr (eb , item ) == 0 ) {
764
- btrfs_release_path (path );
765
- goto update_inode ;
766
- }
757
+ ret = btrfs_insert_empty_item (trans , root , path , key , sizeof (* item ));
758
+ if (ret ) {
759
+ btrfs_abort_transaction (trans , ret );
760
+ goto out ;
761
+ }
762
+ dest_offset = btrfs_item_ptr_offset (path -> nodes [0 ], path -> slots [0 ]);
763
+ copy_extent_buffer (path -> nodes [0 ], eb , dest_offset , (unsigned long )item ,
764
+ sizeof (* item ));
767
765
768
- ins .objectid = btrfs_file_extent_disk_bytenr (eb , item );
769
- ins .type = BTRFS_EXTENT_ITEM_KEY ;
770
- ins .offset = btrfs_file_extent_disk_num_bytes (eb , item );
771
- offset = key -> offset - btrfs_file_extent_offset (eb , item );
766
+ /*
767
+ * We have an explicit hole and NO_HOLES is not enabled. We have added
768
+ * the hole file extent item to the subvolume tree, so we don't have
769
+ * anything else to do other than update the file extent item range and
770
+ * update the inode item.
771
+ */
772
+ if (btrfs_file_extent_disk_bytenr (eb , item ) == 0 ) {
773
+ btrfs_release_path (path );
774
+ goto update_inode ;
775
+ }
772
776
773
- /*
774
- * Manually record dirty extent, as here we did a shallow
775
- * file extent item copy and skip normal backref update,
776
- * but modifying extent tree all by ourselves.
777
- * So need to manually record dirty extent for qgroup,
778
- * as the owner of the file extent changed from log tree
779
- * (doesn't affect qgroup) to fs/file tree(affects qgroup)
780
- */
781
- ret = btrfs_qgroup_trace_extent (trans ,
782
- btrfs_file_extent_disk_bytenr (eb , item ),
783
- btrfs_file_extent_disk_num_bytes (eb , item ));
784
- if (ret < 0 ) {
777
+ ins .objectid = btrfs_file_extent_disk_bytenr (eb , item );
778
+ ins .type = BTRFS_EXTENT_ITEM_KEY ;
779
+ ins .offset = btrfs_file_extent_disk_num_bytes (eb , item );
780
+ offset = key -> offset - btrfs_file_extent_offset (eb , item );
781
+
782
+ /*
783
+ * Manually record dirty extent, as here we did a shallow file extent
784
+ * item copy and skip normal backref update, but modifying extent tree
785
+ * all by ourselves. So need to manually record dirty extent for qgroup,
786
+ * as the owner of the file extent changed from log tree (doesn't affect
787
+ * qgroup) to fs/file tree (affects qgroup).
788
+ */
789
+ ret = btrfs_qgroup_trace_extent (trans ,
790
+ btrfs_file_extent_disk_bytenr (eb , item ),
791
+ btrfs_file_extent_disk_num_bytes (eb , item ));
792
+ if (ret < 0 ) {
793
+ btrfs_abort_transaction (trans , ret );
794
+ goto out ;
795
+ }
796
+
797
+ /*
798
+ * Is this extent already allocated in the extent tree?
799
+ * If so, just add a reference.
800
+ */
801
+ ret = btrfs_lookup_data_extent (fs_info , ins .objectid , ins .offset );
802
+ if (ret < 0 ) {
803
+ btrfs_abort_transaction (trans , ret );
804
+ goto out ;
805
+ } else if (ret == 0 ) {
806
+ struct btrfs_ref ref = {
807
+ .action = BTRFS_ADD_DELAYED_REF ,
808
+ .bytenr = ins .objectid ,
809
+ .num_bytes = ins .offset ,
810
+ .owning_root = btrfs_root_id (root ),
811
+ .ref_root = btrfs_root_id (root ),
812
+ };
813
+
814
+ btrfs_init_data_ref (& ref , key -> objectid , offset , 0 , false);
815
+ ret = btrfs_inc_extent_ref (trans , & ref );
816
+ if (ret ) {
785
817
btrfs_abort_transaction (trans , ret );
786
818
goto out ;
787
819
}
788
-
789
- /*
790
- * Is this extent already allocated in the extent tree?
791
- * If so, just add a reference.
792
- */
793
- ret = btrfs_lookup_data_extent (fs_info , ins .objectid , ins .offset );
794
- if (ret < 0 ) {
820
+ } else {
821
+ /* Insert the extent pointer in the extent tree. */
822
+ ret = btrfs_alloc_logged_file_extent (trans , btrfs_root_id (root ),
823
+ key -> objectid , offset , & ins );
824
+ if (ret ) {
795
825
btrfs_abort_transaction (trans , ret );
796
826
goto out ;
797
- } else if (ret == 0 ) {
798
- struct btrfs_ref ref = {
799
- .action = BTRFS_ADD_DELAYED_REF ,
800
- .bytenr = ins .objectid ,
801
- .num_bytes = ins .offset ,
802
- .owning_root = btrfs_root_id (root ),
803
- .ref_root = btrfs_root_id (root ),
804
- };
805
-
806
- btrfs_init_data_ref (& ref , key -> objectid , offset , 0 , false);
807
- ret = btrfs_inc_extent_ref (trans , & ref );
808
- if (ret ) {
809
- btrfs_abort_transaction (trans , ret );
810
- goto out ;
811
- }
812
- } else {
813
- /* Insert the extent pointer in the extent tree. */
814
- ret = btrfs_alloc_logged_file_extent (trans , btrfs_root_id (root ),
815
- key -> objectid , offset , & ins );
816
- if (ret ) {
817
- btrfs_abort_transaction (trans , ret );
818
- goto out ;
819
- }
820
827
}
828
+ }
821
829
822
- btrfs_release_path (path );
830
+ btrfs_release_path (path );
823
831
824
- if (btrfs_file_extent_compression (eb , item )) {
825
- csum_start = ins .objectid ;
826
- csum_end = csum_start + ins .offset ;
827
- } else {
828
- csum_start = ins .objectid + btrfs_file_extent_offset (eb , item );
829
- csum_end = csum_start + btrfs_file_extent_num_bytes (eb , item );
830
- }
832
+ if (btrfs_file_extent_compression (eb , item )) {
833
+ csum_start = ins .objectid ;
834
+ csum_end = csum_start + ins .offset ;
835
+ } else {
836
+ csum_start = ins .objectid + btrfs_file_extent_offset (eb , item );
837
+ csum_end = csum_start + btrfs_file_extent_num_bytes (eb , item );
838
+ }
831
839
832
- ret = btrfs_lookup_csums_list (root -> log_root , csum_start , csum_end - 1 ,
833
- & ordered_sums , false);
834
- if (ret < 0 ) {
835
- btrfs_abort_transaction (trans , ret );
836
- goto out ;
837
- }
838
- ret = 0 ;
839
- /*
840
- * Now delete all existing cums in the csum root that cover our
841
- * range. We do this because we can have an extent that is
842
- * completely referenced by one file extent item and partially
843
- * referenced by another file extent item (like after using the
844
- * clone or extent_same ioctls). In this case if we end up doing
845
- * the replay of the one that partially references the extent
846
- * first, and we do not do the csum deletion below, we can get 2
847
- * csum items in the csum tree that overlap each other. For
848
- * example, imagine our log has the two following file extent items:
849
- *
850
- * key (257 EXTENT_DATA 409600)
851
- * extent data disk byte 12845056 nr 102400
852
- * extent data offset 20480 nr 20480 ram 102400
853
- *
854
- * key (257 EXTENT_DATA 819200)
855
- * extent data disk byte 12845056 nr 102400
856
- * extent data offset 0 nr 102400 ram 102400
857
- *
858
- * Where the second one fully references the 100K extent that
859
- * starts at disk byte 12845056, and the log tree has a single
860
- * csum item that covers the entire range of the extent:
861
- *
862
- * key (EXTENT_CSUM EXTENT_CSUM 12845056) itemsize 100
863
- *
864
- * After the first file extent item is replayed, the csum tree
865
- * gets the following csum item:
866
- *
867
- * key (EXTENT_CSUM EXTENT_CSUM 12865536) itemsize 20
868
- *
869
- * Which covers the 20K sub-range starting at offset 20K of our
870
- * extent. Now when we replay the second file extent item, if we
871
- * do not delete existing csum items that cover any of its
872
- * blocks, we end up getting two csum items in our csum tree
873
- * that overlap each other:
874
- *
875
- * key (EXTENT_CSUM EXTENT_CSUM 12845056) itemsize 100
876
- * key (EXTENT_CSUM EXTENT_CSUM 12865536) itemsize 20
877
- *
878
- * Which is a problem, because after this anyone trying to
879
- * lookup up for the checksum of any block of our extent
880
- * starting at an offset of 40K or higher, will end up looking
881
- * at the second csum item only, which does not contain the
882
- * checksum for any block starting at offset 40K or higher of
883
- * our extent.
884
- */
885
- while (!list_empty (& ordered_sums )) {
886
- struct btrfs_ordered_sum * sums ;
887
- struct btrfs_root * csum_root ;
840
+ ret = btrfs_lookup_csums_list (root -> log_root , csum_start , csum_end - 1 ,
841
+ & ordered_sums , false);
842
+ if (ret < 0 ) {
843
+ btrfs_abort_transaction (trans , ret );
844
+ goto out ;
845
+ }
846
+ ret = 0 ;
847
+ /*
848
+ * Now delete all existing cums in the csum root that cover our range.
849
+ * We do this because we can have an extent that is completely
850
+ * referenced by one file extent item and partially referenced by
851
+ * another file extent item (like after using the clone or extent_same
852
+ * ioctls). In this case if we end up doing the replay of the one that
853
+ * partially references the extent first, and we do not do the csum
854
+ * deletion below, we can get 2 csum items in the csum tree that overlap
855
+ * each other. For example, imagine our log has the two following file
856
+ * extent items:
857
+ *
858
+ * key (257 EXTENT_DATA 409600)
859
+ * extent data disk byte 12845056 nr 102400
860
+ * extent data offset 20480 nr 20480 ram 102400
861
+ *
862
+ * key (257 EXTENT_DATA 819200)
863
+ * extent data disk byte 12845056 nr 102400
864
+ * extent data offset 0 nr 102400 ram 102400
865
+ *
866
+ * Where the second one fully references the 100K extent that starts at
867
+ * disk byte 12845056, and the log tree has a single csum item that
868
+ * covers the entire range of the extent:
869
+ *
870
+ * key (EXTENT_CSUM EXTENT_CSUM 12845056) itemsize 100
871
+ *
872
+ * After the first file extent item is replayed, the csum tree gets the
873
+ * following csum item:
874
+ *
875
+ * key (EXTENT_CSUM EXTENT_CSUM 12865536) itemsize 20
876
+ *
877
+ * Which covers the 20K sub-range starting at offset 20K of our extent.
878
+ * Now when we replay the second file extent item, if we do not delete
879
+ * existing csum items that cover any of its blocks, we end up getting
880
+ * two csum items in our csum tree that overlap each other:
881
+ *
882
+ * key (EXTENT_CSUM EXTENT_CSUM 12845056) itemsize 100
883
+ * key (EXTENT_CSUM EXTENT_CSUM 12865536) itemsize 20
884
+ *
885
+ * Which is a problem, because after this anyone trying to lookup for
886
+ * the checksum of any block of our extent starting at an offset of 40K
887
+ * or higher, will end up looking at the second csum item only, which
888
+ * does not contain the checksum for any block starting at offset 40K or
889
+ * higher of our extent.
890
+ */
891
+ while (!list_empty (& ordered_sums )) {
892
+ struct btrfs_ordered_sum * sums ;
893
+ struct btrfs_root * csum_root ;
888
894
889
- sums = list_first_entry (& ordered_sums ,
890
- struct btrfs_ordered_sum , list );
891
- csum_root = btrfs_csum_root (fs_info , sums -> logical );
892
- if (!ret ) {
893
- ret = btrfs_del_csums (trans , csum_root , sums -> logical ,
894
- sums -> len );
895
- if (ret )
896
- btrfs_abort_transaction (trans , ret );
897
- }
898
- if (!ret ) {
899
- ret = btrfs_csum_file_blocks (trans , csum_root , sums );
900
- if (ret )
901
- btrfs_abort_transaction (trans , ret );
902
- }
903
- list_del (& sums -> list );
904
- kfree (sums );
895
+ sums = list_first_entry (& ordered_sums , struct btrfs_ordered_sum , list );
896
+ csum_root = btrfs_csum_root (fs_info , sums -> logical );
897
+ if (!ret ) {
898
+ ret = btrfs_del_csums (trans , csum_root , sums -> logical ,
899
+ sums -> len );
900
+ if (ret )
901
+ btrfs_abort_transaction (trans , ret );
905
902
}
906
- if (ret )
907
- goto out ;
908
- } else if (found_type == BTRFS_FILE_EXTENT_INLINE ) {
909
- /* inline extents are easy, we just overwrite them */
910
- ret = overwrite_item ( trans , root , path , eb , slot , key );
911
- if ( ret )
912
- goto out ;
903
+ if (! ret ) {
904
+ ret = btrfs_csum_file_blocks ( trans , csum_root , sums ) ;
905
+ if (ret )
906
+ btrfs_abort_transaction ( trans , ret );
907
+ }
908
+ list_del ( & sums -> list );
909
+ kfree ( sums ) ;
913
910
}
911
+ if (ret )
912
+ goto out ;
914
913
915
914
update_inode :
916
915
ret = btrfs_inode_set_file_extent_range (inode , start , extent_end - start );
0 commit comments