@@ -410,13 +410,14 @@ static bool disk_insert_zone_wplug(struct gendisk *disk,
410
410
}
411
411
}
412
412
hlist_add_head_rcu (& zwplug -> node , & disk -> zone_wplugs_hash [idx ]);
413
+ atomic_inc (& disk -> nr_zone_wplugs );
413
414
spin_unlock_irqrestore (& disk -> zone_wplugs_lock , flags );
414
415
415
416
return true;
416
417
}
417
418
418
- static struct blk_zone_wplug * disk_get_zone_wplug (struct gendisk * disk ,
419
- sector_t sector )
419
+ static struct blk_zone_wplug * disk_get_hashed_zone_wplug (struct gendisk * disk ,
420
+ sector_t sector )
420
421
{
421
422
unsigned int zno = disk_zone_no (disk , sector );
422
423
unsigned int idx = hash_32 (zno , disk -> zone_wplugs_hash_bits );
@@ -437,6 +438,15 @@ static struct blk_zone_wplug *disk_get_zone_wplug(struct gendisk *disk,
437
438
return NULL ;
438
439
}
439
440
441
+ static inline struct blk_zone_wplug * disk_get_zone_wplug (struct gendisk * disk ,
442
+ sector_t sector )
443
+ {
444
+ if (!atomic_read (& disk -> nr_zone_wplugs ))
445
+ return NULL ;
446
+
447
+ return disk_get_hashed_zone_wplug (disk , sector );
448
+ }
449
+
440
450
static void disk_free_zone_wplug_rcu (struct rcu_head * rcu_head )
441
451
{
442
452
struct blk_zone_wplug * zwplug =
@@ -503,6 +513,7 @@ static void disk_remove_zone_wplug(struct gendisk *disk,
503
513
zwplug -> flags |= BLK_ZONE_WPLUG_UNHASHED ;
504
514
spin_lock_irqsave (& disk -> zone_wplugs_lock , flags );
505
515
hlist_del_init_rcu (& zwplug -> node );
516
+ atomic_dec (& disk -> nr_zone_wplugs );
506
517
spin_unlock_irqrestore (& disk -> zone_wplugs_lock , flags );
507
518
disk_put_zone_wplug (zwplug );
508
519
}
@@ -593,6 +604,11 @@ static void disk_zone_wplug_abort(struct blk_zone_wplug *zwplug)
593
604
{
594
605
struct bio * bio ;
595
606
607
+ if (bio_list_empty (& zwplug -> bio_list ))
608
+ return ;
609
+
610
+ pr_warn_ratelimited ("%s: zone %u: Aborting plugged BIOs\n" ,
611
+ zwplug -> disk -> disk_name , zwplug -> zone_no );
596
612
while ((bio = bio_list_pop (& zwplug -> bio_list )))
597
613
blk_zone_wplug_bio_io_error (zwplug , bio );
598
614
}
@@ -1040,6 +1056,47 @@ static bool blk_zone_wplug_handle_write(struct bio *bio, unsigned int nr_segs)
1040
1056
return true;
1041
1057
}
1042
1058
1059
+ static void blk_zone_wplug_handle_native_zone_append (struct bio * bio )
1060
+ {
1061
+ struct gendisk * disk = bio -> bi_bdev -> bd_disk ;
1062
+ struct blk_zone_wplug * zwplug ;
1063
+ unsigned long flags ;
1064
+
1065
+ /*
1066
+ * We have native support for zone append operations, so we are not
1067
+ * going to handle @bio through plugging. However, we may already have a
1068
+ * zone write plug for the target zone if that zone was previously
1069
+ * partially written using regular writes. In such case, we risk leaving
1070
+ * the plug in the disk hash table if the zone is fully written using
1071
+ * zone append operations. Avoid this by removing the zone write plug.
1072
+ */
1073
+ zwplug = disk_get_zone_wplug (disk , bio -> bi_iter .bi_sector );
1074
+ if (likely (!zwplug ))
1075
+ return ;
1076
+
1077
+ spin_lock_irqsave (& zwplug -> lock , flags );
1078
+
1079
+ /*
1080
+ * We are about to remove the zone write plug. But if the user
1081
+ * (mistakenly) has issued regular writes together with native zone
1082
+ * append, we must aborts the writes as otherwise the plugged BIOs would
1083
+ * not be executed by the plug BIO work as disk_get_zone_wplug() will
1084
+ * return NULL after the plug is removed. Aborting the plugged write
1085
+ * BIOs is consistent with the fact that these writes will most likely
1086
+ * fail anyway as there is no ordering guarantees between zone append
1087
+ * operations and regular write operations.
1088
+ */
1089
+ if (!bio_list_empty (& zwplug -> bio_list )) {
1090
+ pr_warn_ratelimited ("%s: zone %u: Invalid mix of zone append and regular writes\n" ,
1091
+ disk -> disk_name , zwplug -> zone_no );
1092
+ disk_zone_wplug_abort (zwplug );
1093
+ }
1094
+ disk_remove_zone_wplug (disk , zwplug );
1095
+ spin_unlock_irqrestore (& zwplug -> lock , flags );
1096
+
1097
+ disk_put_zone_wplug (zwplug );
1098
+ }
1099
+
1043
1100
/**
1044
1101
* blk_zone_plug_bio - Handle a zone write BIO with zone write plugging
1045
1102
* @bio: The BIO being submitted
@@ -1096,8 +1153,10 @@ bool blk_zone_plug_bio(struct bio *bio, unsigned int nr_segs)
1096
1153
*/
1097
1154
switch (bio_op (bio )) {
1098
1155
case REQ_OP_ZONE_APPEND :
1099
- if (!bdev_emulates_zone_append (bdev ))
1156
+ if (!bdev_emulates_zone_append (bdev )) {
1157
+ blk_zone_wplug_handle_native_zone_append (bio );
1100
1158
return false;
1159
+ }
1101
1160
fallthrough ;
1102
1161
case REQ_OP_WRITE :
1103
1162
case REQ_OP_WRITE_ZEROES :
@@ -1284,6 +1343,7 @@ static int disk_alloc_zone_resources(struct gendisk *disk,
1284
1343
{
1285
1344
unsigned int i ;
1286
1345
1346
+ atomic_set (& disk -> nr_zone_wplugs , 0 );
1287
1347
disk -> zone_wplugs_hash_bits =
1288
1348
min (ilog2 (pool_size ) + 1 , BLK_ZONE_WPLUG_MAX_HASH_BITS );
1289
1349
@@ -1338,6 +1398,7 @@ static void disk_destroy_zone_wplugs_hash_table(struct gendisk *disk)
1338
1398
}
1339
1399
}
1340
1400
1401
+ WARN_ON_ONCE (atomic_read (& disk -> nr_zone_wplugs ));
1341
1402
kfree (disk -> zone_wplugs_hash );
1342
1403
disk -> zone_wplugs_hash = NULL ;
1343
1404
disk -> zone_wplugs_hash_bits = 0 ;
@@ -1550,11 +1611,12 @@ static int blk_revalidate_seq_zone(struct blk_zone *zone, unsigned int idx,
1550
1611
}
1551
1612
1552
1613
/*
1553
- * We need to track the write pointer of all zones that are not
1554
- * empty nor full. So make sure we have a zone write plug for
1555
- * such zone if the device has a zone write plug hash table.
1614
+ * If the device needs zone append emulation, we need to track the
1615
+ * write pointer of all zones that are not empty nor full. So make sure
1616
+ * we have a zone write plug for such zone if the device has a zone
1617
+ * write plug hash table.
1556
1618
*/
1557
- if (!disk -> zone_wplugs_hash )
1619
+ if (!queue_emulates_zone_append ( disk -> queue ) || ! disk -> zone_wplugs_hash )
1558
1620
return 0 ;
1559
1621
1560
1622
disk_zone_wplug_sync_wp_offset (disk , zone );
0 commit comments