13
13
14
14
#define DM_MSG_PREFIX "zone"
15
15
16
- #define DM_ZONE_INVALID_WP_OFST UINT_MAX
17
-
18
16
/*
19
17
* For internal zone reports bypassing the top BIO submission path.
20
18
*/
@@ -145,35 +143,28 @@ bool dm_is_zone_write(struct mapped_device *md, struct bio *bio)
145
143
}
146
144
}
147
145
148
- /*
149
- * Count conventional zones of a mapped zoned device. If the device
150
- * only has conventional zones, do not expose it as zoned.
151
- */
152
- static int dm_check_zoned_cb (struct blk_zone * zone , unsigned int idx ,
153
- void * data )
154
- {
155
- unsigned int * nr_conv_zones = data ;
156
-
157
- if (zone -> type == BLK_ZONE_TYPE_CONVENTIONAL )
158
- (* nr_conv_zones )++ ;
159
-
160
- return 0 ;
161
- }
162
-
163
146
/*
164
147
* Revalidate the zones of a mapped device to initialize resource necessary
165
148
* for zone append emulation. Note that we cannot simply use the block layer
166
149
* blk_revalidate_disk_zones() function here as the mapped device is suspended
167
150
* (this is called from __bind() context).
168
151
*/
169
- static int dm_revalidate_zones (struct mapped_device * md , struct dm_table * t )
152
+ int dm_revalidate_zones (struct dm_table * t , struct request_queue * q )
170
153
{
154
+ struct mapped_device * md = t -> md ;
171
155
struct gendisk * disk = md -> disk ;
172
156
int ret ;
173
157
158
+ if (!get_capacity (disk ))
159
+ return 0 ;
160
+
174
161
/* Revalidate only if something changed. */
175
- if (!disk -> nr_zones || disk -> nr_zones != md -> nr_zones )
162
+ if (!disk -> nr_zones || disk -> nr_zones != md -> nr_zones ) {
163
+ DMINFO ("%s using %s zone append" ,
164
+ disk -> disk_name ,
165
+ queue_emulates_zone_append (q ) ? "emulated" : "native" );
176
166
md -> nr_zones = 0 ;
167
+ }
177
168
178
169
if (md -> nr_zones )
179
170
return 0 ;
@@ -220,13 +211,127 @@ static bool dm_table_supports_zone_append(struct dm_table *t)
220
211
return true;
221
212
}
222
213
214
+ struct dm_device_zone_count {
215
+ sector_t start ;
216
+ sector_t len ;
217
+ unsigned int total_nr_seq_zones ;
218
+ unsigned int target_nr_seq_zones ;
219
+ };
220
+
221
+ /*
222
+ * Count the total number of and the number of mapped sequential zones of a
223
+ * target zoned device.
224
+ */
225
+ static int dm_device_count_zones_cb (struct blk_zone * zone ,
226
+ unsigned int idx , void * data )
227
+ {
228
+ struct dm_device_zone_count * zc = data ;
229
+
230
+ if (zone -> type != BLK_ZONE_TYPE_CONVENTIONAL ) {
231
+ zc -> total_nr_seq_zones ++ ;
232
+ if (zone -> start >= zc -> start &&
233
+ zone -> start < zc -> start + zc -> len )
234
+ zc -> target_nr_seq_zones ++ ;
235
+ }
236
+
237
+ return 0 ;
238
+ }
239
+
240
+ static int dm_device_count_zones (struct dm_dev * dev ,
241
+ struct dm_device_zone_count * zc )
242
+ {
243
+ int ret ;
244
+
245
+ ret = blkdev_report_zones (dev -> bdev , 0 , BLK_ALL_ZONES ,
246
+ dm_device_count_zones_cb , zc );
247
+ if (ret < 0 )
248
+ return ret ;
249
+ if (!ret )
250
+ return - EIO ;
251
+ return 0 ;
252
+ }
253
+
254
+ struct dm_zone_resource_limits {
255
+ unsigned int mapped_nr_seq_zones ;
256
+ struct queue_limits * lim ;
257
+ bool reliable_limits ;
258
+ };
259
+
260
+ static int device_get_zone_resource_limits (struct dm_target * ti ,
261
+ struct dm_dev * dev , sector_t start ,
262
+ sector_t len , void * data )
263
+ {
264
+ struct dm_zone_resource_limits * zlim = data ;
265
+ struct gendisk * disk = dev -> bdev -> bd_disk ;
266
+ unsigned int max_open_zones , max_active_zones ;
267
+ int ret ;
268
+ struct dm_device_zone_count zc = {
269
+ .start = start ,
270
+ .len = len ,
271
+ };
272
+
273
+ /*
274
+ * If the target is not the whole device, the device zone resources may
275
+ * be shared between different targets. Check this by counting the
276
+ * number of mapped sequential zones: if this number is smaller than the
277
+ * total number of sequential zones of the target device, then resource
278
+ * sharing may happen and the zone limits will not be reliable.
279
+ */
280
+ ret = dm_device_count_zones (dev , & zc );
281
+ if (ret ) {
282
+ DMERR ("Count %s zones failed %d" , disk -> disk_name , ret );
283
+ return ret ;
284
+ }
285
+
286
+ /*
287
+ * If the target does not map any sequential zones, then we do not need
288
+ * any zone resource limits.
289
+ */
290
+ if (!zc .target_nr_seq_zones )
291
+ return 0 ;
292
+
293
+ /*
294
+ * If the target does not map all sequential zones, the limits
295
+ * will not be reliable.
296
+ */
297
+ if (zc .target_nr_seq_zones < zc .total_nr_seq_zones )
298
+ zlim -> reliable_limits = false;
299
+
300
+ /*
301
+ * If the target maps less sequential zones than the limit values, then
302
+ * we do not have limits for this target.
303
+ */
304
+ max_active_zones = disk -> queue -> limits .max_active_zones ;
305
+ if (max_active_zones >= zc .target_nr_seq_zones )
306
+ max_active_zones = 0 ;
307
+ zlim -> lim -> max_active_zones =
308
+ min_not_zero (max_active_zones , zlim -> lim -> max_active_zones );
309
+
310
+ max_open_zones = disk -> queue -> limits .max_open_zones ;
311
+ if (max_open_zones >= zc .target_nr_seq_zones )
312
+ max_open_zones = 0 ;
313
+ zlim -> lim -> max_open_zones =
314
+ min_not_zero (max_open_zones , zlim -> lim -> max_open_zones );
315
+
316
+ /*
317
+ * Also count the total number of sequential zones for the mapped
318
+ * device so that when we are done inspecting all its targets, we are
319
+ * able to check if the mapped device actually has any sequential zones.
320
+ */
321
+ zlim -> mapped_nr_seq_zones += zc .target_nr_seq_zones ;
322
+
323
+ return 0 ;
324
+ }
325
+
223
326
int dm_set_zones_restrictions (struct dm_table * t , struct request_queue * q ,
224
327
struct queue_limits * lim )
225
328
{
226
329
struct mapped_device * md = t -> md ;
227
330
struct gendisk * disk = md -> disk ;
228
- unsigned int nr_conv_zones = 0 ;
229
- int ret ;
331
+ struct dm_zone_resource_limits zlim = {
332
+ .reliable_limits = true,
333
+ .lim = lim ,
334
+ };
230
335
231
336
/*
232
337
* Check if zone append is natively supported, and if not, set the
@@ -240,46 +345,54 @@ int dm_set_zones_restrictions(struct dm_table *t, struct request_queue *q,
240
345
lim -> max_zone_append_sectors = 0 ;
241
346
}
242
347
243
- if (!get_capacity (md -> disk ))
244
- return 0 ;
245
-
246
348
/*
247
- * Count conventional zones to check that the mapped device will indeed
248
- * have sequential write required zones.
349
+ * Determine the max open and max active zone limits for the mapped
350
+ * device by inspecting the zone resource limits and the zones mapped
351
+ * by each target.
249
352
*/
250
- md -> zone_revalidate_map = t ;
251
- ret = dm_blk_report_zones (disk , 0 , UINT_MAX ,
252
- dm_check_zoned_cb , & nr_conv_zones );
253
- md -> zone_revalidate_map = NULL ;
254
- if (ret < 0 ) {
255
- DMERR ("Check zoned failed %d" , ret );
256
- return ret ;
353
+ for (unsigned int i = 0 ; i < t -> num_targets ; i ++ ) {
354
+ struct dm_target * ti = dm_table_get_target (t , i );
355
+
356
+ if (!ti -> type -> iterate_devices ||
357
+ ti -> type -> iterate_devices (ti ,
358
+ device_get_zone_resource_limits , & zlim )) {
359
+ DMERR ("Could not determine %s zone resource limits" ,
360
+ disk -> disk_name );
361
+ return - ENODEV ;
362
+ }
257
363
}
258
364
259
365
/*
260
- * If we only have conventional zones, expose the mapped device as
261
- * a regular device.
366
+ * If we only have conventional zones mapped , expose the mapped device
367
+ + as a regular device.
262
368
*/
263
- if (nr_conv_zones >= ret ) {
369
+ if (! zlim . mapped_nr_seq_zones ) {
264
370
lim -> max_open_zones = 0 ;
265
371
lim -> max_active_zones = 0 ;
372
+ lim -> max_zone_append_sectors = 0 ;
373
+ lim -> zone_write_granularity = 0 ;
374
+ lim -> chunk_sectors = 0 ;
266
375
lim -> zoned = false;
267
376
clear_bit (DMF_EMULATE_ZONE_APPEND , & md -> flags );
377
+ md -> nr_zones = 0 ;
268
378
disk -> nr_zones = 0 ;
269
379
return 0 ;
270
380
}
271
381
272
- if (!md -> disk -> nr_zones ) {
273
- DMINFO ("%s using %s zone append" ,
274
- md -> disk -> disk_name ,
275
- queue_emulates_zone_append (q ) ? "emulated" : "native" );
276
- }
277
-
278
- ret = dm_revalidate_zones (md , t );
279
- if (ret < 0 )
280
- return ret ;
382
+ /*
383
+ * Warn once (when the capacity is not yet set) if the mapped device is
384
+ * partially using zone resources of the target devices as that leads to
385
+ * unreliable limits, i.e. if another mapped device uses the same
386
+ * underlying devices, we cannot enforce zone limits to guarantee that
387
+ * writing will not lead to errors. Note that we really should return
388
+ * an error for such case but there is no easy way to find out if
389
+ * another mapped device uses the same underlying zoned devices.
390
+ */
391
+ if (!get_capacity (disk ) && !zlim .reliable_limits )
392
+ DMWARN ("%s zone resource limits may be unreliable" ,
393
+ disk -> disk_name );
281
394
282
- if (!static_key_enabled (& zoned_enabled .key ))
395
+ if (lim -> zoned && !static_key_enabled (& zoned_enabled .key ))
283
396
static_branch_enable (& zoned_enabled );
284
397
return 0 ;
285
398
}
0 commit comments