@@ -1883,6 +1883,9 @@ static int coroutine_fn bdrv_co_do_pwrite_zeroes(BlockDriverState *bs,
1883
1883
return - ENOTSUP ;
1884
1884
}
1885
1885
1886
+ /* Invalidate the cached block-status data range if this write overlaps */
1887
+ bdrv_bsc_invalidate_range (bs , offset , bytes );
1888
+
1886
1889
assert (alignment % bs -> bl .request_alignment == 0 );
1887
1890
head = offset % alignment ;
1888
1891
tail = (offset + bytes ) % alignment ;
@@ -2447,9 +2450,65 @@ static int coroutine_fn bdrv_co_block_status(BlockDriverState *bs,
2447
2450
aligned_bytes = ROUND_UP (offset + bytes , align ) - aligned_offset ;
2448
2451
2449
2452
if (bs -> drv -> bdrv_co_block_status ) {
2450
- ret = bs -> drv -> bdrv_co_block_status (bs , want_zero , aligned_offset ,
2451
- aligned_bytes , pnum , & local_map ,
2452
- & local_file );
2453
+ /*
2454
+ * Use the block-status cache only for protocol nodes: Format
2455
+ * drivers are generally quick to inquire the status, but protocol
2456
+ * drivers often need to get information from outside of qemu, so
2457
+ * we do not have control over the actual implementation. There
2458
+ * have been cases where inquiring the status took an unreasonably
2459
+ * long time, and we can do nothing in qemu to fix it.
2460
+ * This is especially problematic for images with large data areas,
2461
+ * because finding the few holes in them and giving them special
2462
+ * treatment does not gain much performance. Therefore, we try to
2463
+ * cache the last-identified data region.
2464
+ *
2465
+ * Second, limiting ourselves to protocol nodes allows us to assume
2466
+ * the block status for data regions to be DATA | OFFSET_VALID, and
2467
+ * that the host offset is the same as the guest offset.
2468
+ *
2469
+ * Note that it is possible that external writers zero parts of
2470
+ * the cached regions without the cache being invalidated, and so
2471
+ * we may report zeroes as data. This is not catastrophic,
2472
+ * however, because reporting zeroes as data is fine.
2473
+ */
2474
+ if (QLIST_EMPTY (& bs -> children ) &&
2475
+ bdrv_bsc_is_data (bs , aligned_offset , pnum ))
2476
+ {
2477
+ ret = BDRV_BLOCK_DATA | BDRV_BLOCK_OFFSET_VALID ;
2478
+ local_file = bs ;
2479
+ local_map = aligned_offset ;
2480
+ } else {
2481
+ ret = bs -> drv -> bdrv_co_block_status (bs , want_zero , aligned_offset ,
2482
+ aligned_bytes , pnum , & local_map ,
2483
+ & local_file );
2484
+
2485
+ /*
2486
+ * Note that checking QLIST_EMPTY(&bs->children) is also done when
2487
+ * the cache is queried above. Technically, we do not need to check
2488
+ * it here; the worst that can happen is that we fill the cache for
2489
+ * non-protocol nodes, and then it is never used. However, filling
2490
+ * the cache requires an RCU update, so double check here to avoid
2491
+ * such an update if possible.
2492
+ */
2493
+ if (ret == (BDRV_BLOCK_DATA | BDRV_BLOCK_OFFSET_VALID ) &&
2494
+ QLIST_EMPTY (& bs -> children ))
2495
+ {
2496
+ /*
2497
+ * When a protocol driver reports BLOCK_OFFSET_VALID, the
2498
+ * returned local_map value must be the same as the offset we
2499
+ * have passed (aligned_offset), and local_bs must be the node
2500
+ * itself.
2501
+ * Assert this, because we follow this rule when reading from
2502
+ * the cache (see the `local_file = bs` and
2503
+ * `local_map = aligned_offset` assignments above), and the
2504
+ * result the cache delivers must be the same as the driver
2505
+ * would deliver.
2506
+ */
2507
+ assert (local_file == bs );
2508
+ assert (local_map == aligned_offset );
2509
+ bdrv_bsc_fill (bs , aligned_offset , * pnum );
2510
+ }
2511
+ }
2453
2512
} else {
2454
2513
/* Default code for filters */
2455
2514
@@ -3002,6 +3061,9 @@ int coroutine_fn bdrv_co_pdiscard(BdrvChild *child, int64_t offset,
3002
3061
return 0 ;
3003
3062
}
3004
3063
3064
+ /* Invalidate the cached block-status data range if this discard overlaps */
3065
+ bdrv_bsc_invalidate_range (bs , offset , bytes );
3066
+
3005
3067
/* Discard is advisory, but some devices track and coalesce
3006
3068
* unaligned requests, so we must pass everything down rather than
3007
3069
* round here. Still, most devices will just silently ignore
0 commit comments