Skip to content

Commit 031a3d7

Browse files
committed
Merge branch 'for-6.11/block' into for-next
* for-6.11/block: dm: Remove unused macro DM_ZONE_INVALID_WP_OFST dm: Improve zone resource limits handling dm: Call dm_revalidate_zones() after setting the queue limits block: Improve checks on zone resource limits
2 parents c579de8 + eaa3706 commit 031a3d7

File tree

5 files changed

+195
-54
lines changed

5 files changed

+195
-54
lines changed

block/blk-settings.c

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,14 @@ static int blk_validate_zoned_limits(struct queue_limits *lim)
8080
if (WARN_ON_ONCE(!IS_ENABLED(CONFIG_BLK_DEV_ZONED)))
8181
return -EINVAL;
8282

83+
/*
84+
* Given that active zones include open zones, the maximum number of
85+
* open zones cannot be larger than the maximum number of active zones.
86+
*/
87+
if (lim->max_active_zones &&
88+
lim->max_open_zones > lim->max_active_zones)
89+
return -EINVAL;
90+
8391
if (lim->zone_write_granularity < lim->logical_block_size)
8492
lim->zone_write_granularity = lim->logical_block_size;
8593

block/blk-zoned.c

Lines changed: 16 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1647,8 +1647,22 @@ static int disk_update_zone_resources(struct gendisk *disk,
16471647
return -ENODEV;
16481648
}
16491649

1650+
lim = queue_limits_start_update(q);
1651+
1652+
/*
1653+
* Some devices can advertize zone resource limits that are larger than
1654+
* the number of sequential zones of the zoned block device, e.g. a
1655+
* small ZNS namespace. For such case, assume that the zoned device has
1656+
* no zone resource limits.
1657+
*/
1658+
nr_seq_zones = disk->nr_zones - nr_conv_zones;
1659+
if (lim.max_open_zones >= nr_seq_zones)
1660+
lim.max_open_zones = 0;
1661+
if (lim.max_active_zones >= nr_seq_zones)
1662+
lim.max_active_zones = 0;
1663+
16501664
if (!disk->zone_wplugs_pool)
1651-
return 0;
1665+
goto commit;
16521666

16531667
/*
16541668
* If the device has no limit on the maximum number of open and active
@@ -1657,9 +1671,6 @@ static int disk_update_zone_resources(struct gendisk *disk,
16571671
* dynamic zone write plug allocation when simultaneously writing to
16581672
* more zones than the size of the mempool.
16591673
*/
1660-
lim = queue_limits_start_update(q);
1661-
1662-
nr_seq_zones = disk->nr_zones - nr_conv_zones;
16631674
pool_size = max(lim.max_open_zones, lim.max_active_zones);
16641675
if (!pool_size)
16651676
pool_size = min(BLK_ZONE_WPLUG_DEFAULT_POOL_SIZE, nr_seq_zones);
@@ -1673,6 +1684,7 @@ static int disk_update_zone_resources(struct gendisk *disk,
16731684
lim.max_open_zones = 0;
16741685
}
16751686

1687+
commit:
16761688
return queue_limits_commit_update(q, &lim);
16771689
}
16781690

drivers/md/dm-table.c

Lines changed: 11 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1921,10 +1921,7 @@ int dm_table_set_restrictions(struct dm_table *t, struct request_queue *q,
19211921
dm_table_any_dev_attr(t, device_is_not_random, NULL))
19221922
blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, q);
19231923

1924-
/*
1925-
* For a zoned target, setup the zones related queue attributes
1926-
* and resources necessary for zone append emulation if necessary.
1927-
*/
1924+
/* For a zoned table, setup the zone related queue attributes. */
19281925
if (IS_ENABLED(CONFIG_BLK_DEV_ZONED) && limits->zoned) {
19291926
r = dm_set_zones_restrictions(t, q, limits);
19301927
if (r)
@@ -1935,6 +1932,16 @@ int dm_table_set_restrictions(struct dm_table *t, struct request_queue *q,
19351932
if (r)
19361933
return r;
19371934

1935+
/*
1936+
* Now that the limits are set, check the zones mapped by the table
1937+
* and setup the resources for zone append emulation if necessary.
1938+
*/
1939+
if (IS_ENABLED(CONFIG_BLK_DEV_ZONED) && limits->zoned) {
1940+
r = dm_revalidate_zones(t, q);
1941+
if (r)
1942+
return r;
1943+
}
1944+
19381945
dm_update_crypto_profile(q, t);
19391946

19401947
/*

drivers/md/dm-zone.c

Lines changed: 159 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -13,8 +13,6 @@
1313

1414
#define DM_MSG_PREFIX "zone"
1515

16-
#define DM_ZONE_INVALID_WP_OFST UINT_MAX
17-
1816
/*
1917
* For internal zone reports bypassing the top BIO submission path.
2018
*/
@@ -145,35 +143,28 @@ bool dm_is_zone_write(struct mapped_device *md, struct bio *bio)
145143
}
146144
}
147145

148-
/*
149-
* Count conventional zones of a mapped zoned device. If the device
150-
* only has conventional zones, do not expose it as zoned.
151-
*/
152-
static int dm_check_zoned_cb(struct blk_zone *zone, unsigned int idx,
153-
void *data)
154-
{
155-
unsigned int *nr_conv_zones = data;
156-
157-
if (zone->type == BLK_ZONE_TYPE_CONVENTIONAL)
158-
(*nr_conv_zones)++;
159-
160-
return 0;
161-
}
162-
163146
/*
164147
* Revalidate the zones of a mapped device to initialize resource necessary
165148
* for zone append emulation. Note that we cannot simply use the block layer
166149
* blk_revalidate_disk_zones() function here as the mapped device is suspended
167150
* (this is called from __bind() context).
168151
*/
169-
static int dm_revalidate_zones(struct mapped_device *md, struct dm_table *t)
152+
int dm_revalidate_zones(struct dm_table *t, struct request_queue *q)
170153
{
154+
struct mapped_device *md = t->md;
171155
struct gendisk *disk = md->disk;
172156
int ret;
173157

158+
if (!get_capacity(disk))
159+
return 0;
160+
174161
/* Revalidate only if something changed. */
175-
if (!disk->nr_zones || disk->nr_zones != md->nr_zones)
162+
if (!disk->nr_zones || disk->nr_zones != md->nr_zones) {
163+
DMINFO("%s using %s zone append",
164+
disk->disk_name,
165+
queue_emulates_zone_append(q) ? "emulated" : "native");
176166
md->nr_zones = 0;
167+
}
177168

178169
if (md->nr_zones)
179170
return 0;
@@ -220,13 +211,127 @@ static bool dm_table_supports_zone_append(struct dm_table *t)
220211
return true;
221212
}
222213

214+
struct dm_device_zone_count {
215+
sector_t start;
216+
sector_t len;
217+
unsigned int total_nr_seq_zones;
218+
unsigned int target_nr_seq_zones;
219+
};
220+
221+
/*
222+
* Count the total number of and the number of mapped sequential zones of a
223+
* target zoned device.
224+
*/
225+
static int dm_device_count_zones_cb(struct blk_zone *zone,
226+
unsigned int idx, void *data)
227+
{
228+
struct dm_device_zone_count *zc = data;
229+
230+
if (zone->type != BLK_ZONE_TYPE_CONVENTIONAL) {
231+
zc->total_nr_seq_zones++;
232+
if (zone->start >= zc->start &&
233+
zone->start < zc->start + zc->len)
234+
zc->target_nr_seq_zones++;
235+
}
236+
237+
return 0;
238+
}
239+
240+
static int dm_device_count_zones(struct dm_dev *dev,
241+
struct dm_device_zone_count *zc)
242+
{
243+
int ret;
244+
245+
ret = blkdev_report_zones(dev->bdev, 0, BLK_ALL_ZONES,
246+
dm_device_count_zones_cb, zc);
247+
if (ret < 0)
248+
return ret;
249+
if (!ret)
250+
return -EIO;
251+
return 0;
252+
}
253+
254+
struct dm_zone_resource_limits {
255+
unsigned int mapped_nr_seq_zones;
256+
struct queue_limits *lim;
257+
bool reliable_limits;
258+
};
259+
260+
static int device_get_zone_resource_limits(struct dm_target *ti,
261+
struct dm_dev *dev, sector_t start,
262+
sector_t len, void *data)
263+
{
264+
struct dm_zone_resource_limits *zlim = data;
265+
struct gendisk *disk = dev->bdev->bd_disk;
266+
unsigned int max_open_zones, max_active_zones;
267+
int ret;
268+
struct dm_device_zone_count zc = {
269+
.start = start,
270+
.len = len,
271+
};
272+
273+
/*
274+
* If the target is not the whole device, the device zone resources may
275+
* be shared between different targets. Check this by counting the
276+
* number of mapped sequential zones: if this number is smaller than the
277+
* total number of sequential zones of the target device, then resource
278+
* sharing may happen and the zone limits will not be reliable.
279+
*/
280+
ret = dm_device_count_zones(dev, &zc);
281+
if (ret) {
282+
DMERR("Count %s zones failed %d", disk->disk_name, ret);
283+
return ret;
284+
}
285+
286+
/*
287+
* If the target does not map any sequential zones, then we do not need
288+
* any zone resource limits.
289+
*/
290+
if (!zc.target_nr_seq_zones)
291+
return 0;
292+
293+
/*
294+
* If the target does not map all sequential zones, the limits
295+
* will not be reliable.
296+
*/
297+
if (zc.target_nr_seq_zones < zc.total_nr_seq_zones)
298+
zlim->reliable_limits = false;
299+
300+
/*
301+
* If the target maps less sequential zones than the limit values, then
302+
* we do not have limits for this target.
303+
*/
304+
max_active_zones = disk->queue->limits.max_active_zones;
305+
if (max_active_zones >= zc.target_nr_seq_zones)
306+
max_active_zones = 0;
307+
zlim->lim->max_active_zones =
308+
min_not_zero(max_active_zones, zlim->lim->max_active_zones);
309+
310+
max_open_zones = disk->queue->limits.max_open_zones;
311+
if (max_open_zones >= zc.target_nr_seq_zones)
312+
max_open_zones = 0;
313+
zlim->lim->max_open_zones =
314+
min_not_zero(max_open_zones, zlim->lim->max_open_zones);
315+
316+
/*
317+
* Also count the total number of sequential zones for the mapped
318+
* device so that when we are done inspecting all its targets, we are
319+
* able to check if the mapped device actually has any sequential zones.
320+
*/
321+
zlim->mapped_nr_seq_zones += zc.target_nr_seq_zones;
322+
323+
return 0;
324+
}
325+
223326
int dm_set_zones_restrictions(struct dm_table *t, struct request_queue *q,
224327
struct queue_limits *lim)
225328
{
226329
struct mapped_device *md = t->md;
227330
struct gendisk *disk = md->disk;
228-
unsigned int nr_conv_zones = 0;
229-
int ret;
331+
struct dm_zone_resource_limits zlim = {
332+
.reliable_limits = true,
333+
.lim = lim,
334+
};
230335

231336
/*
232337
* Check if zone append is natively supported, and if not, set the
@@ -240,46 +345,54 @@ int dm_set_zones_restrictions(struct dm_table *t, struct request_queue *q,
240345
lim->max_zone_append_sectors = 0;
241346
}
242347

243-
if (!get_capacity(md->disk))
244-
return 0;
245-
246348
/*
247-
* Count conventional zones to check that the mapped device will indeed
248-
* have sequential write required zones.
349+
* Determine the max open and max active zone limits for the mapped
350+
* device by inspecting the zone resource limits and the zones mapped
351+
* by each target.
249352
*/
250-
md->zone_revalidate_map = t;
251-
ret = dm_blk_report_zones(disk, 0, UINT_MAX,
252-
dm_check_zoned_cb, &nr_conv_zones);
253-
md->zone_revalidate_map = NULL;
254-
if (ret < 0) {
255-
DMERR("Check zoned failed %d", ret);
256-
return ret;
353+
for (unsigned int i = 0; i < t->num_targets; i++) {
354+
struct dm_target *ti = dm_table_get_target(t, i);
355+
356+
if (!ti->type->iterate_devices ||
357+
ti->type->iterate_devices(ti,
358+
device_get_zone_resource_limits, &zlim)) {
359+
DMERR("Could not determine %s zone resource limits",
360+
disk->disk_name);
361+
return -ENODEV;
362+
}
257363
}
258364

259365
/*
260-
* If we only have conventional zones, expose the mapped device as
261-
* a regular device.
366+
* If we only have conventional zones mapped, expose the mapped device
367+
+ as a regular device.
262368
*/
263-
if (nr_conv_zones >= ret) {
369+
if (!zlim.mapped_nr_seq_zones) {
264370
lim->max_open_zones = 0;
265371
lim->max_active_zones = 0;
372+
lim->max_zone_append_sectors = 0;
373+
lim->zone_write_granularity = 0;
374+
lim->chunk_sectors = 0;
266375
lim->zoned = false;
267376
clear_bit(DMF_EMULATE_ZONE_APPEND, &md->flags);
377+
md->nr_zones = 0;
268378
disk->nr_zones = 0;
269379
return 0;
270380
}
271381

272-
if (!md->disk->nr_zones) {
273-
DMINFO("%s using %s zone append",
274-
md->disk->disk_name,
275-
queue_emulates_zone_append(q) ? "emulated" : "native");
276-
}
277-
278-
ret = dm_revalidate_zones(md, t);
279-
if (ret < 0)
280-
return ret;
382+
/*
383+
* Warn once (when the capacity is not yet set) if the mapped device is
384+
* partially using zone resources of the target devices as that leads to
385+
* unreliable limits, i.e. if another mapped device uses the same
386+
* underlying devices, we cannot enforce zone limits to guarantee that
387+
* writing will not lead to errors. Note that we really should return
388+
* an error for such case but there is no easy way to find out if
389+
* another mapped device uses the same underlying zoned devices.
390+
*/
391+
if (!get_capacity(disk) && !zlim.reliable_limits)
392+
DMWARN("%s zone resource limits may be unreliable",
393+
disk->disk_name);
281394

282-
if (!static_key_enabled(&zoned_enabled.key))
395+
if (lim->zoned && !static_key_enabled(&zoned_enabled.key))
283396
static_branch_enable(&zoned_enabled);
284397
return 0;
285398
}

drivers/md/dm.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -103,6 +103,7 @@ int dm_setup_md_queue(struct mapped_device *md, struct dm_table *t);
103103
*/
104104
int dm_set_zones_restrictions(struct dm_table *t, struct request_queue *q,
105105
struct queue_limits *lim);
106+
int dm_revalidate_zones(struct dm_table *t, struct request_queue *q);
106107
void dm_zone_endio(struct dm_io *io, struct bio *clone);
107108
#ifdef CONFIG_BLK_DEV_ZONED
108109
int dm_blk_report_zones(struct gendisk *disk, sector_t sector,

0 commit comments

Comments
 (0)