Skip to content

Commit 81e7706

Browse files
damien-lemoalaxboe
authored andcommitted
dm: handle REQ_OP_ZONE_RESET_ALL
This commit implements processing of the REQ_OP_ZONE_RESET_ALL operation for zoned mapped devices. Given that this operation always has a BIO sector of 0 and a 0 size, processing through the regular BIO __split_and_process_bio() function does not work because this function would always select the first target. Instead, handling of this operation is implemented using the function __send_zone_reset_all(). Similarly to the __send_empty_flush() function, the new __send_zone_reset_all() function manually goes through all targets of a mapped device table doing the following: 1) If the target can natively support REQ_OP_ZONE_RESET_ALL, __send_duplicate_bios() is used to forward the reset all operation to the target. This case is handled with the __send_zone_reset_all_native() function. 2) For other targets, the function __send_zone_reset_all_emulated() is executed to emulate the execution of REQ_OP_ZONE_RESET_ALL using regular REQ_OP_ZONE_RESET operations. Targets that can natively support REQ_OP_ZONE_RESET_ALL are identified using the new target field zone_reset_all_supported. This boolean is set to true in for targets that have reliable zone limits, that is, targets that map all sequential write required zones of their zoned device(s). Setting this field is handled in dm_set_zones_restrictions() and device_get_zone_resource_limits(). For targets with unreliable zone limits, REQ_OP_ZONE_RESET_ALL must be emulated (case 2 above). This is implemented with __send_zone_reset_all_emulated() and is similar to the block layer function blkdev_zone_reset_all_emulated(): first a report zones is done for the zones of the target to identify zones that need reset, that is, any sequential write required zone that is not already empty. This is done using a bitmap and the function dm_zone_get_reset_bitmap() which sets to 1 the bit corresponding to a zone that needs reset. Next, this zone bitmap is inspected and a clone BIO modified to use the REQ_OP_ZONE_RESET operation issued for any zone with its bit set in the zone bitmap. This implementation is more efficient than what the block layer does with blkdev_zone_reset_all_emulated(), which is always used for DM zoned devices currently: as we can natively use REQ_OP_ZONE_RESET_ALL on targets mapping all sequential write required zones, resetting all zones of a zoned mapped device can be much faster compared to always emulating this operation using regular per-zone reset. In the worst case, this implementation is as-efficient as the block layer emulation. This reduction in the time it takes to reset all zones of a zoned mapped device depends directly on the mapped device targets mapping (reliable zone limits or not). Signed-off-by: Damien Le Moal <[email protected]> Reviewed-by: Christoph Hellwig <[email protected]> Reviewed-by: Johannes Thumshirn <[email protected]> Reviewed-by: Martin K. Petersen <[email protected]> Link: https://lore.kernel.org/r/[email protected] Signed-off-by: Jens Axboe <[email protected]>
1 parent ae7e965 commit 81e7706

File tree

4 files changed

+190
-5
lines changed

4 files changed

+190
-5
lines changed

drivers/md/dm-zone.c

Lines changed: 48 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -292,10 +292,12 @@ static int device_get_zone_resource_limits(struct dm_target *ti,
292292

293293
/*
294294
* If the target does not map all sequential zones, the limits
295-
* will not be reliable.
295+
* will not be reliable and we cannot use REQ_OP_ZONE_RESET_ALL.
296296
*/
297-
if (zc.target_nr_seq_zones < zc.total_nr_seq_zones)
297+
if (zc.target_nr_seq_zones < zc.total_nr_seq_zones) {
298298
zlim->reliable_limits = false;
299+
ti->zone_reset_all_supported = false;
300+
}
299301

300302
/*
301303
* If the target maps less sequential zones than the limit values, then
@@ -353,6 +355,14 @@ int dm_set_zones_restrictions(struct dm_table *t, struct request_queue *q,
353355
for (unsigned int i = 0; i < t->num_targets; i++) {
354356
struct dm_target *ti = dm_table_get_target(t, i);
355357

358+
/*
359+
* Assume that the target can accept REQ_OP_ZONE_RESET_ALL.
360+
* device_get_zone_resource_limits() may adjust this if one of
361+
* the device used by the target does not have all its
362+
* sequential write required zones mapped.
363+
*/
364+
ti->zone_reset_all_supported = true;
365+
356366
if (!ti->type->iterate_devices ||
357367
ti->type->iterate_devices(ti,
358368
device_get_zone_resource_limits, &zlim)) {
@@ -420,3 +430,39 @@ void dm_zone_endio(struct dm_io *io, struct bio *clone)
420430

421431
return;
422432
}
433+
434+
static int dm_zone_need_reset_cb(struct blk_zone *zone, unsigned int idx,
435+
void *data)
436+
{
437+
/*
438+
* For an all-zones reset, ignore conventional, empty, read-only
439+
* and offline zones.
440+
*/
441+
switch (zone->cond) {
442+
case BLK_ZONE_COND_NOT_WP:
443+
case BLK_ZONE_COND_EMPTY:
444+
case BLK_ZONE_COND_READONLY:
445+
case BLK_ZONE_COND_OFFLINE:
446+
return 0;
447+
default:
448+
set_bit(idx, (unsigned long *)data);
449+
return 0;
450+
}
451+
}
452+
453+
int dm_zone_get_reset_bitmap(struct mapped_device *md, struct dm_table *t,
454+
sector_t sector, unsigned int nr_zones,
455+
unsigned long *need_reset)
456+
{
457+
int ret;
458+
459+
ret = dm_blk_do_report_zones(md, t, sector, nr_zones,
460+
dm_zone_need_reset_cb, need_reset);
461+
if (ret != nr_zones) {
462+
DMERR("Get %s zone reset bitmap failed\n",
463+
md->disk->disk_name);
464+
return -EIO;
465+
}
466+
467+
return 0;
468+
}

drivers/md/dm.c

Lines changed: 132 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1606,6 +1606,7 @@ static bool is_abnormal_io(struct bio *bio)
16061606
case REQ_OP_DISCARD:
16071607
case REQ_OP_SECURE_ERASE:
16081608
case REQ_OP_WRITE_ZEROES:
1609+
case REQ_OP_ZONE_RESET_ALL:
16091610
return true;
16101611
default:
16111612
return false;
@@ -1774,6 +1775,119 @@ static inline bool dm_zone_plug_bio(struct mapped_device *md, struct bio *bio)
17741775
{
17751776
return dm_emulate_zone_append(md) && blk_zone_plug_bio(bio, 0);
17761777
}
1778+
1779+
static blk_status_t __send_zone_reset_all_emulated(struct clone_info *ci,
1780+
struct dm_target *ti)
1781+
{
1782+
struct bio_list blist = BIO_EMPTY_LIST;
1783+
struct mapped_device *md = ci->io->md;
1784+
unsigned int zone_sectors = md->disk->queue->limits.chunk_sectors;
1785+
unsigned long *need_reset;
1786+
unsigned int i, nr_zones, nr_reset;
1787+
unsigned int num_bios = 0;
1788+
blk_status_t sts = BLK_STS_OK;
1789+
sector_t sector = ti->begin;
1790+
struct bio *clone;
1791+
int ret;
1792+
1793+
nr_zones = ti->len >> ilog2(zone_sectors);
1794+
need_reset = bitmap_zalloc(nr_zones, GFP_NOIO);
1795+
if (!need_reset)
1796+
return BLK_STS_RESOURCE;
1797+
1798+
ret = dm_zone_get_reset_bitmap(md, ci->map, ti->begin,
1799+
nr_zones, need_reset);
1800+
if (ret) {
1801+
sts = BLK_STS_IOERR;
1802+
goto free_bitmap;
1803+
}
1804+
1805+
/* If we have no zone to reset, we are done. */
1806+
nr_reset = bitmap_weight(need_reset, nr_zones);
1807+
if (!nr_reset)
1808+
goto free_bitmap;
1809+
1810+
atomic_add(nr_zones, &ci->io->io_count);
1811+
1812+
for (i = 0; i < nr_zones; i++) {
1813+
1814+
if (!test_bit(i, need_reset)) {
1815+
sector += zone_sectors;
1816+
continue;
1817+
}
1818+
1819+
if (bio_list_empty(&blist)) {
1820+
/* This may take a while, so be nice to others */
1821+
if (num_bios)
1822+
cond_resched();
1823+
1824+
/*
1825+
* We may need to reset thousands of zones, so let's
1826+
* not go crazy with the clone allocation.
1827+
*/
1828+
alloc_multiple_bios(&blist, ci, ti, min(nr_reset, 32),
1829+
NULL, GFP_NOIO);
1830+
}
1831+
1832+
/* Get a clone and change it to a regular reset operation. */
1833+
clone = bio_list_pop(&blist);
1834+
clone->bi_opf &= ~REQ_OP_MASK;
1835+
clone->bi_opf |= REQ_OP_ZONE_RESET | REQ_SYNC;
1836+
clone->bi_iter.bi_sector = sector;
1837+
clone->bi_iter.bi_size = 0;
1838+
__map_bio(clone);
1839+
1840+
sector += zone_sectors;
1841+
num_bios++;
1842+
nr_reset--;
1843+
}
1844+
1845+
WARN_ON_ONCE(!bio_list_empty(&blist));
1846+
atomic_sub(nr_zones - num_bios, &ci->io->io_count);
1847+
ci->sector_count = 0;
1848+
1849+
free_bitmap:
1850+
bitmap_free(need_reset);
1851+
1852+
return sts;
1853+
}
1854+
1855+
static void __send_zone_reset_all_native(struct clone_info *ci,
1856+
struct dm_target *ti)
1857+
{
1858+
unsigned int bios;
1859+
1860+
atomic_add(1, &ci->io->io_count);
1861+
bios = __send_duplicate_bios(ci, ti, 1, NULL, GFP_NOIO);
1862+
atomic_sub(1 - bios, &ci->io->io_count);
1863+
1864+
ci->sector_count = 0;
1865+
}
1866+
1867+
static blk_status_t __send_zone_reset_all(struct clone_info *ci)
1868+
{
1869+
struct dm_table *t = ci->map;
1870+
blk_status_t sts = BLK_STS_OK;
1871+
1872+
for (unsigned int i = 0; i < t->num_targets; i++) {
1873+
struct dm_target *ti = dm_table_get_target(t, i);
1874+
1875+
if (ti->zone_reset_all_supported) {
1876+
__send_zone_reset_all_native(ci, ti);
1877+
continue;
1878+
}
1879+
1880+
sts = __send_zone_reset_all_emulated(ci, ti);
1881+
if (sts != BLK_STS_OK)
1882+
break;
1883+
}
1884+
1885+
/* Release the reference that alloc_io() took for submission. */
1886+
atomic_sub(1, &ci->io->io_count);
1887+
1888+
return sts;
1889+
}
1890+
17771891
#else
17781892
static inline bool dm_zone_bio_needs_split(struct mapped_device *md,
17791893
struct bio *bio)
@@ -1784,6 +1898,10 @@ static inline bool dm_zone_plug_bio(struct mapped_device *md, struct bio *bio)
17841898
{
17851899
return false;
17861900
}
1901+
static blk_status_t __send_zone_reset_all(struct clone_info *ci)
1902+
{
1903+
return BLK_STS_NOTSUPP;
1904+
}
17871905
#endif
17881906

17891907
/*
@@ -1797,9 +1915,14 @@ static void dm_split_and_process_bio(struct mapped_device *md,
17971915
blk_status_t error = BLK_STS_OK;
17981916
bool is_abnormal, need_split;
17991917

1800-
need_split = is_abnormal = is_abnormal_io(bio);
1801-
if (static_branch_unlikely(&zoned_enabled))
1802-
need_split = is_abnormal || dm_zone_bio_needs_split(md, bio);
1918+
is_abnormal = is_abnormal_io(bio);
1919+
if (static_branch_unlikely(&zoned_enabled)) {
1920+
/* Special case REQ_OP_ZONE_RESET_ALL as it cannot be split. */
1921+
need_split = (bio_op(bio) != REQ_OP_ZONE_RESET_ALL) &&
1922+
(is_abnormal || dm_zone_bio_needs_split(md, bio));
1923+
} else {
1924+
need_split = is_abnormal;
1925+
}
18031926

18041927
if (unlikely(need_split)) {
18051928
/*
@@ -1840,6 +1963,12 @@ static void dm_split_and_process_bio(struct mapped_device *md,
18401963
goto out;
18411964
}
18421965

1966+
if (static_branch_unlikely(&zoned_enabled) &&
1967+
(bio_op(bio) == REQ_OP_ZONE_RESET_ALL)) {
1968+
error = __send_zone_reset_all(&ci);
1969+
goto out;
1970+
}
1971+
18431972
error = __split_and_process_bio(&ci);
18441973
if (error || !ci.sector_count)
18451974
goto out;

drivers/md/dm.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -110,6 +110,9 @@ int dm_blk_report_zones(struct gendisk *disk, sector_t sector,
110110
unsigned int nr_zones, report_zones_cb cb, void *data);
111111
bool dm_is_zone_write(struct mapped_device *md, struct bio *bio);
112112
int dm_zone_map_bio(struct dm_target_io *io);
113+
int dm_zone_get_reset_bitmap(struct mapped_device *md, struct dm_table *t,
114+
sector_t sector, unsigned int nr_zones,
115+
unsigned long *need_reset);
113116
#else
114117
#define dm_blk_report_zones NULL
115118
static inline bool dm_is_zone_write(struct mapped_device *md, struct bio *bio)

include/linux/device-mapper.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -357,6 +357,13 @@ struct dm_target {
357357
*/
358358
bool discards_supported:1;
359359

360+
/*
361+
* Automatically set by dm-core if this target supports
362+
* REQ_OP_ZONE_RESET_ALL. Otherwise, this operation will be emulated
363+
* using REQ_OP_ZONE_RESET. Target drivers must not set this manually.
364+
*/
365+
bool zone_reset_all_supported:1;
366+
360367
/*
361368
* Set if this target requires that discards be split on
362369
* 'max_discard_sectors' boundaries.

0 commit comments

Comments
 (0)