Skip to content

Commit b465ae7

Browse files
Ming Leiaxboe
authored andcommitted
ublk: add feature UBLK_F_QUIESCE
Add feature UBLK_F_QUIESCE, which adds control command `UBLK_U_CMD_QUIESCE_DEV` for quiescing device, then device state can become `UBLK_S_DEV_QUIESCED` or `UBLK_S_DEV_FAIL_IO` finally from ublk_ch_release() with ublk server cooperation. This feature can help to support to upgrade ublk server application by shutting down ublk server gracefully, meantime keep ublk block device persistent during the upgrading period. The feature is only available for UBLK_F_USER_RECOVERY. Suggested-by: Yoav Cohen <[email protected]> Link: https://lore.kernel.org/linux-block/DM4PR12MB632807AB7CDCE77D1E5AB7D0A9B92@DM4PR12MB6328.namprd12.prod.outlook.com/ Signed-off-by: Ming Lei <[email protected]> Link: https://lore.kernel.org/r/[email protected] Signed-off-by: Jens Axboe <[email protected]>
1 parent f40b1f2 commit b465ae7

File tree

2 files changed

+142
-1
lines changed

2 files changed

+142
-1
lines changed

drivers/block/ublk_drv.c

Lines changed: 123 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,7 @@
5151
/* private ioctl command mirror */
5252
#define UBLK_CMD_DEL_DEV_ASYNC _IOC_NR(UBLK_U_CMD_DEL_DEV_ASYNC)
5353
#define UBLK_CMD_UPDATE_SIZE _IOC_NR(UBLK_U_CMD_UPDATE_SIZE)
54+
#define UBLK_CMD_QUIESCE_DEV _IOC_NR(UBLK_U_CMD_QUIESCE_DEV)
5455

5556
#define UBLK_IO_REGISTER_IO_BUF _IOC_NR(UBLK_U_IO_REGISTER_IO_BUF)
5657
#define UBLK_IO_UNREGISTER_IO_BUF _IOC_NR(UBLK_U_IO_UNREGISTER_IO_BUF)
@@ -67,7 +68,8 @@
6768
| UBLK_F_ZONED \
6869
| UBLK_F_USER_RECOVERY_FAIL_IO \
6970
| UBLK_F_UPDATE_SIZE \
70-
| UBLK_F_AUTO_BUF_REG)
71+
| UBLK_F_AUTO_BUF_REG \
72+
| UBLK_F_QUIESCE)
7173

7274
#define UBLK_F_ALL_RECOVERY_FLAGS (UBLK_F_USER_RECOVERY \
7375
| UBLK_F_USER_RECOVERY_REISSUE \
@@ -2841,6 +2843,11 @@ static int ublk_ctrl_add_dev(const struct ublksrv_ctrl_cmd *header)
28412843
return -EINVAL;
28422844
}
28432845

2846+
if ((info.flags & UBLK_F_QUIESCE) && !(info.flags & UBLK_F_USER_RECOVERY)) {
2847+
pr_warn("UBLK_F_QUIESCE requires UBLK_F_USER_RECOVERY\n");
2848+
return -EINVAL;
2849+
}
2850+
28442851
/*
28452852
* unprivileged device can't be trusted, but RECOVERY and
28462853
* RECOVERY_REISSUE still may hang error handling, so can't
@@ -3233,6 +3240,117 @@ static void ublk_ctrl_set_size(struct ublk_device *ub, const struct ublksrv_ctrl
32333240
set_capacity_and_notify(ub->ub_disk, p->dev_sectors);
32343241
mutex_unlock(&ub->mutex);
32353242
}
3243+
3244+
struct count_busy {
3245+
const struct ublk_queue *ubq;
3246+
unsigned int nr_busy;
3247+
};
3248+
3249+
static bool ublk_count_busy_req(struct request *rq, void *data)
3250+
{
3251+
struct count_busy *idle = data;
3252+
3253+
if (!blk_mq_request_started(rq) && rq->mq_hctx->driver_data == idle->ubq)
3254+
idle->nr_busy += 1;
3255+
return true;
3256+
}
3257+
3258+
/* uring_cmd is guaranteed to be active if the associated request is idle */
3259+
static bool ubq_has_idle_io(const struct ublk_queue *ubq)
3260+
{
3261+
struct count_busy data = {
3262+
.ubq = ubq,
3263+
};
3264+
3265+
blk_mq_tagset_busy_iter(&ubq->dev->tag_set, ublk_count_busy_req, &data);
3266+
return data.nr_busy < ubq->q_depth;
3267+
}
3268+
3269+
/* Wait until each hw queue has at least one idle IO */
3270+
static int ublk_wait_for_idle_io(struct ublk_device *ub,
3271+
unsigned int timeout_ms)
3272+
{
3273+
unsigned int elapsed = 0;
3274+
int ret;
3275+
3276+
while (elapsed < timeout_ms && !signal_pending(current)) {
3277+
unsigned int queues_cancelable = 0;
3278+
int i;
3279+
3280+
for (i = 0; i < ub->dev_info.nr_hw_queues; i++) {
3281+
struct ublk_queue *ubq = ublk_get_queue(ub, i);
3282+
3283+
queues_cancelable += !!ubq_has_idle_io(ubq);
3284+
}
3285+
3286+
/*
3287+
* Each queue needs at least one active command for
3288+
* notifying ublk server
3289+
*/
3290+
if (queues_cancelable == ub->dev_info.nr_hw_queues)
3291+
break;
3292+
3293+
msleep(UBLK_REQUEUE_DELAY_MS);
3294+
elapsed += UBLK_REQUEUE_DELAY_MS;
3295+
}
3296+
3297+
if (signal_pending(current))
3298+
ret = -EINTR;
3299+
else if (elapsed >= timeout_ms)
3300+
ret = -EBUSY;
3301+
else
3302+
ret = 0;
3303+
3304+
return ret;
3305+
}
3306+
3307+
static int ublk_ctrl_quiesce_dev(struct ublk_device *ub,
3308+
const struct ublksrv_ctrl_cmd *header)
3309+
{
3310+
/* zero means wait forever */
3311+
u64 timeout_ms = header->data[0];
3312+
struct gendisk *disk;
3313+
int i, ret = -ENODEV;
3314+
3315+
if (!(ub->dev_info.flags & UBLK_F_QUIESCE))
3316+
return -EOPNOTSUPP;
3317+
3318+
mutex_lock(&ub->mutex);
3319+
disk = ublk_get_disk(ub);
3320+
if (!disk)
3321+
goto unlock;
3322+
if (ub->dev_info.state == UBLK_S_DEV_DEAD)
3323+
goto put_disk;
3324+
3325+
ret = 0;
3326+
/* already in expected state */
3327+
if (ub->dev_info.state != UBLK_S_DEV_LIVE)
3328+
goto put_disk;
3329+
3330+
/* Mark all queues as canceling */
3331+
blk_mq_quiesce_queue(disk->queue);
3332+
for (i = 0; i < ub->dev_info.nr_hw_queues; i++) {
3333+
struct ublk_queue *ubq = ublk_get_queue(ub, i);
3334+
3335+
ubq->canceling = true;
3336+
}
3337+
blk_mq_unquiesce_queue(disk->queue);
3338+
3339+
if (!timeout_ms)
3340+
timeout_ms = UINT_MAX;
3341+
ret = ublk_wait_for_idle_io(ub, timeout_ms);
3342+
3343+
put_disk:
3344+
ublk_put_disk(disk);
3345+
unlock:
3346+
mutex_unlock(&ub->mutex);
3347+
3348+
/* Cancel pending uring_cmd */
3349+
if (!ret)
3350+
ublk_cancel_dev(ub);
3351+
return ret;
3352+
}
3353+
32363354
/*
32373355
* All control commands are sent via /dev/ublk-control, so we have to check
32383356
* the destination device's permission
@@ -3319,6 +3437,7 @@ static int ublk_ctrl_uring_cmd_permission(struct ublk_device *ub,
33193437
case UBLK_CMD_START_USER_RECOVERY:
33203438
case UBLK_CMD_END_USER_RECOVERY:
33213439
case UBLK_CMD_UPDATE_SIZE:
3440+
case UBLK_CMD_QUIESCE_DEV:
33223441
mask = MAY_READ | MAY_WRITE;
33233442
break;
33243443
default:
@@ -3414,6 +3533,9 @@ static int ublk_ctrl_uring_cmd(struct io_uring_cmd *cmd,
34143533
ublk_ctrl_set_size(ub, header);
34153534
ret = 0;
34163535
break;
3536+
case UBLK_CMD_QUIESCE_DEV:
3537+
ret = ublk_ctrl_quiesce_dev(ub, header);
3538+
break;
34173539
default:
34183540
ret = -EOPNOTSUPP;
34193541
break;

include/uapi/linux/ublk_cmd.h

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,8 @@
5353
_IOR('u', 0x14, struct ublksrv_ctrl_cmd)
5454
#define UBLK_U_CMD_UPDATE_SIZE \
5555
_IOWR('u', 0x15, struct ublksrv_ctrl_cmd)
56+
#define UBLK_U_CMD_QUIESCE_DEV \
57+
_IOWR('u', 0x16, struct ublksrv_ctrl_cmd)
5658

5759
/*
5860
* 64bits are enough now, and it should be easy to extend in case of
@@ -253,6 +255,23 @@
253255
*/
254256
#define UBLK_F_AUTO_BUF_REG (1ULL << 11)
255257

258+
/*
259+
* Control command `UBLK_U_CMD_QUIESCE_DEV` is added for quiescing device,
260+
* which state can be transitioned to `UBLK_S_DEV_QUIESCED` or
261+
* `UBLK_S_DEV_FAIL_IO` finally, and it needs ublk server cooperation for
262+
* handling `UBLK_IO_RES_ABORT` correctly.
263+
*
264+
* Typical use case is for supporting to upgrade ublk server application,
265+
* meantime keep ublk block device persistent during the period.
266+
*
267+
* This feature is only available when UBLK_F_USER_RECOVERY is enabled.
268+
*
269+
* Note, this command returns -EBUSY in case that all IO commands are being
270+
* handled by ublk server and not completed in specified time period which
271+
* is passed from the control command parameter.
272+
*/
273+
#define UBLK_F_QUIESCE (1ULL << 12)
274+
256275
/* device state */
257276
#define UBLK_S_DEV_DEAD 0
258277
#define UBLK_S_DEV_LIVE 1

0 commit comments

Comments
 (0)