@@ -2480,6 +2480,7 @@ static int emit_fiemap_extent(struct fiemap_extent_info *fieinfo,
2480
2480
struct fiemap_cache * cache ,
2481
2481
u64 offset , u64 phys , u64 len , u32 flags )
2482
2482
{
2483
+ u64 cache_end ;
2483
2484
int ret = 0 ;
2484
2485
2485
2486
/* Set at the end of extent_fiemap(). */
@@ -2489,15 +2490,102 @@ static int emit_fiemap_extent(struct fiemap_extent_info *fieinfo,
2489
2490
goto assign ;
2490
2491
2491
2492
/*
2492
- * Sanity check, extent_fiemap() should have ensured that new
2493
- * fiemap extent won't overlap with cached one.
2494
- * Not recoverable.
2493
+ * When iterating the extents of the inode, at extent_fiemap(), we may
2494
+ * find an extent that starts at an offset behind the end offset of the
2495
+ * previous extent we processed. This happens if fiemap is called
2496
+ * without FIEMAP_FLAG_SYNC and there are ordered extents completing
2497
+ * while we call btrfs_next_leaf() (through fiemap_next_leaf_item()).
2495
2498
*
2496
- * NOTE: Physical address can overlap, due to compression
2499
+ * For example we are in leaf X processing its last item, which is the
2500
+ * file extent item for file range [512K, 1M[, and after
2501
+ * btrfs_next_leaf() releases the path, there's an ordered extent that
2502
+ * completes for the file range [768K, 2M[, and that results in trimming
2503
+ * the file extent item so that it now corresponds to the file range
2504
+ * [512K, 768K[ and a new file extent item is inserted for the file
2505
+ * range [768K, 2M[, which may end up as the last item of leaf X or as
2506
+ * the first item of the next leaf - in either case btrfs_next_leaf()
2507
+ * will leave us with a path pointing to the new extent item, for the
2508
+ * file range [768K, 2M[, since that's the first key that follows the
2509
+ * last one we processed. So in order not to report overlapping extents
2510
+ * to user space, we trim the length of the previously cached extent and
2511
+ * emit it.
2512
+ *
2513
+ * Upon calling btrfs_next_leaf() we may also find an extent with an
2514
+ * offset smaller than or equals to cache->offset, and this happens
2515
+ * when we had a hole or prealloc extent with several delalloc ranges in
2516
+ * it, but after btrfs_next_leaf() released the path, delalloc was
2517
+ * flushed and the resulting ordered extents were completed, so we can
2518
+ * now have found a file extent item for an offset that is smaller than
2519
+ * or equals to what we have in cache->offset. We deal with this as
2520
+ * described below.
2497
2521
*/
2498
- if (cache -> offset + cache -> len > offset ) {
2499
- WARN_ON (1 );
2500
- return - EINVAL ;
2522
+ cache_end = cache -> offset + cache -> len ;
2523
+ if (cache_end > offset ) {
2524
+ if (offset == cache -> offset ) {
2525
+ /*
2526
+ * We cached a dealloc range (found in the io tree) for
2527
+ * a hole or prealloc extent and we have now found a
2528
+ * file extent item for the same offset. What we have
2529
+ * now is more recent and up to date, so discard what
2530
+ * we had in the cache and use what we have just found.
2531
+ */
2532
+ goto assign ;
2533
+ } else if (offset > cache -> offset ) {
2534
+ /*
2535
+ * The extent range we previously found ends after the
2536
+ * offset of the file extent item we found and that
2537
+ * offset falls somewhere in the middle of that previous
2538
+ * extent range. So adjust the range we previously found
2539
+ * to end at the offset of the file extent item we have
2540
+ * just found, since this extent is more up to date.
2541
+ * Emit that adjusted range and cache the file extent
2542
+ * item we have just found. This corresponds to the case
2543
+ * where a previously found file extent item was split
2544
+ * due to an ordered extent completing.
2545
+ */
2546
+ cache -> len = offset - cache -> offset ;
2547
+ goto emit ;
2548
+ } else {
2549
+ const u64 range_end = offset + len ;
2550
+
2551
+ /*
2552
+ * The offset of the file extent item we have just found
2553
+ * is behind the cached offset. This means we were
2554
+ * processing a hole or prealloc extent for which we
2555
+ * have found delalloc ranges (in the io tree), so what
2556
+ * we have in the cache is the last delalloc range we
2557
+ * found while the file extent item we found can be
2558
+ * either for a whole delalloc range we previously
2559
+ * emmitted or only a part of that range.
2560
+ *
2561
+ * We have two cases here:
2562
+ *
2563
+ * 1) The file extent item's range ends at or behind the
2564
+ * cached extent's end. In this case just ignore the
2565
+ * current file extent item because we don't want to
2566
+ * overlap with previous ranges that may have been
2567
+ * emmitted already;
2568
+ *
2569
+ * 2) The file extent item starts behind the currently
2570
+ * cached extent but its end offset goes beyond the
2571
+ * end offset of the cached extent. We don't want to
2572
+ * overlap with a previous range that may have been
2573
+ * emmitted already, so we emit the currently cached
2574
+ * extent and then partially store the current file
2575
+ * extent item's range in the cache, for the subrange
2576
+ * going the cached extent's end to the end of the
2577
+ * file extent item.
2578
+ */
2579
+ if (range_end <= cache_end )
2580
+ return 0 ;
2581
+
2582
+ if (!(flags & (FIEMAP_EXTENT_ENCODED | FIEMAP_EXTENT_DELALLOC )))
2583
+ phys += cache_end - offset ;
2584
+
2585
+ offset = cache_end ;
2586
+ len = range_end - cache_end ;
2587
+ goto emit ;
2588
+ }
2501
2589
}
2502
2590
2503
2591
/*
@@ -2517,6 +2605,7 @@ static int emit_fiemap_extent(struct fiemap_extent_info *fieinfo,
2517
2605
return 0 ;
2518
2606
}
2519
2607
2608
+ emit :
2520
2609
/* Not mergeable, need to submit cached one */
2521
2610
ret = fiemap_fill_next_extent (fieinfo , cache -> offset , cache -> phys ,
2522
2611
cache -> len , cache -> flags );
@@ -2907,17 +2996,15 @@ int extent_fiemap(struct btrfs_inode *inode, struct fiemap_extent_info *fieinfo,
2907
2996
range_end = round_up (start + len , sectorsize );
2908
2997
prev_extent_end = range_start ;
2909
2998
2910
- btrfs_inode_lock (inode , BTRFS_ILOCK_SHARED );
2911
-
2912
2999
ret = fiemap_find_last_extent_offset (inode , path , & last_extent_end );
2913
3000
if (ret < 0 )
2914
- goto out_unlock ;
3001
+ goto out ;
2915
3002
btrfs_release_path (path );
2916
3003
2917
3004
path -> reada = READA_FORWARD ;
2918
3005
ret = fiemap_search_slot (inode , path , range_start );
2919
3006
if (ret < 0 ) {
2920
- goto out_unlock ;
3007
+ goto out ;
2921
3008
} else if (ret > 0 ) {
2922
3009
/*
2923
3010
* No file extent item found, but we may have delalloc between
@@ -2964,7 +3051,7 @@ int extent_fiemap(struct btrfs_inode *inode, struct fiemap_extent_info *fieinfo,
2964
3051
backref_ctx , 0 , 0 , 0 ,
2965
3052
prev_extent_end , hole_end );
2966
3053
if (ret < 0 ) {
2967
- goto out_unlock ;
3054
+ goto out ;
2968
3055
} else if (ret > 0 ) {
2969
3056
/* fiemap_fill_next_extent() told us to stop. */
2970
3057
stopped = true;
@@ -3020,7 +3107,7 @@ int extent_fiemap(struct btrfs_inode *inode, struct fiemap_extent_info *fieinfo,
3020
3107
extent_gen ,
3021
3108
backref_ctx );
3022
3109
if (ret < 0 )
3023
- goto out_unlock ;
3110
+ goto out ;
3024
3111
else if (ret > 0 )
3025
3112
flags |= FIEMAP_EXTENT_SHARED ;
3026
3113
}
@@ -3031,7 +3118,7 @@ int extent_fiemap(struct btrfs_inode *inode, struct fiemap_extent_info *fieinfo,
3031
3118
}
3032
3119
3033
3120
if (ret < 0 ) {
3034
- goto out_unlock ;
3121
+ goto out ;
3035
3122
} else if (ret > 0 ) {
3036
3123
/* fiemap_fill_next_extent() told us to stop. */
3037
3124
stopped = true;
@@ -3042,12 +3129,12 @@ int extent_fiemap(struct btrfs_inode *inode, struct fiemap_extent_info *fieinfo,
3042
3129
next_item :
3043
3130
if (fatal_signal_pending (current )) {
3044
3131
ret = - EINTR ;
3045
- goto out_unlock ;
3132
+ goto out ;
3046
3133
}
3047
3134
3048
3135
ret = fiemap_next_leaf_item (inode , path );
3049
3136
if (ret < 0 ) {
3050
- goto out_unlock ;
3137
+ goto out ;
3051
3138
} else if (ret > 0 ) {
3052
3139
/* No more file extent items for this inode. */
3053
3140
break ;
@@ -3071,7 +3158,7 @@ int extent_fiemap(struct btrfs_inode *inode, struct fiemap_extent_info *fieinfo,
3071
3158
& delalloc_cached_state , backref_ctx ,
3072
3159
0 , 0 , 0 , prev_extent_end , range_end - 1 );
3073
3160
if (ret < 0 )
3074
- goto out_unlock ;
3161
+ goto out ;
3075
3162
prev_extent_end = range_end ;
3076
3163
}
3077
3164
@@ -3109,9 +3196,6 @@ int extent_fiemap(struct btrfs_inode *inode, struct fiemap_extent_info *fieinfo,
3109
3196
}
3110
3197
3111
3198
ret = emit_last_fiemap_cache (fieinfo , & cache );
3112
-
3113
- out_unlock :
3114
- btrfs_inode_unlock (inode , BTRFS_ILOCK_SHARED );
3115
3199
out :
3116
3200
free_extent_state (delalloc_cached_state );
3117
3201
btrfs_free_backref_share_ctx (backref_ctx );
0 commit comments