Skip to content

Commit c5c5eb2

Browse files
Ming Leiaxboe
authored andcommitted
ublk: avoid ublk_io_release() called after ublk char dev is closed
When running test_stress_04.sh, the following warning is triggered: WARNING: CPU: 1 PID: 135 at drivers/block/ublk_drv.c:1933 ublk_ch_release+0x423/0x4b0 [ublk_drv] This happens when the daemon is abruptly killed: - some references may still be held, because registering IO buffer doesn't grab ublk char device reference OR - io->task_registered_buffers won't be cleared because io buffer is released from non-daemon context For zero-copy and auto buffer register modes, I/O reference crosses syscalls, so IO reference may not be dropped naturally when ublk server is killed abruptly. However, when releasing io_uring context, it is guaranteed that the reference is dropped finally, see io_sqe_buffers_unregister() from io_ring_ctx_free(). Fix this by adding ublk_drain_io_references() that: - Waits for active I/O references dropped in async way by scheduling work function, for avoiding ublk dev and io_uring file's release dependency - Reinitializes io->ref and io->task_registered_buffers to clean state This ensures the reference count state is clean when ublk_queue_reinit() is called, preventing the warning and potential use-after-free. Fixes: 1f6540e ("ublk: zc register/unregister bvec") Fixes: 1ceeedb ("ublk: optimize UBLK_IO_UNREGISTER_IO_BUF on daemon task") Fixes: 8a8fe42 ("ublk: optimize UBLK_IO_REGISTER_IO_BUF on daemon task") Signed-off-by: Ming Lei <[email protected]> Link: https://lore.kernel.org/r/[email protected] Signed-off-by: Jens Axboe <[email protected]>
1 parent e3ef944 commit c5c5eb2

File tree

1 file changed

+70
-2
lines changed

1 file changed

+70
-2
lines changed

drivers/block/ublk_drv.c

Lines changed: 70 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -239,6 +239,7 @@ struct ublk_device {
239239
struct mutex cancel_mutex;
240240
bool canceling;
241241
pid_t ublksrv_tgid;
242+
struct delayed_work exit_work;
242243
};
243244

244245
/* header of ublk_params */
@@ -1595,12 +1596,62 @@ static void ublk_set_canceling(struct ublk_device *ub, bool canceling)
15951596
ublk_get_queue(ub, i)->canceling = canceling;
15961597
}
15971598

1598-
static int ublk_ch_release(struct inode *inode, struct file *filp)
1599+
static bool ublk_check_and_reset_active_ref(struct ublk_device *ub)
15991600
{
1600-
struct ublk_device *ub = filp->private_data;
1601+
int i, j;
1602+
1603+
if (!(ub->dev_info.flags & (UBLK_F_SUPPORT_ZERO_COPY |
1604+
UBLK_F_AUTO_BUF_REG)))
1605+
return false;
1606+
1607+
for (i = 0; i < ub->dev_info.nr_hw_queues; i++) {
1608+
struct ublk_queue *ubq = ublk_get_queue(ub, i);
1609+
1610+
for (j = 0; j < ubq->q_depth; j++) {
1611+
struct ublk_io *io = &ubq->ios[j];
1612+
unsigned int refs = refcount_read(&io->ref) +
1613+
io->task_registered_buffers;
1614+
1615+
/*
1616+
* UBLK_REFCOUNT_INIT or zero means no active
1617+
* reference
1618+
*/
1619+
if (refs != UBLK_REFCOUNT_INIT && refs != 0)
1620+
return true;
1621+
1622+
/* reset to zero if the io hasn't active references */
1623+
refcount_set(&io->ref, 0);
1624+
io->task_registered_buffers = 0;
1625+
}
1626+
}
1627+
return false;
1628+
}
1629+
1630+
static void ublk_ch_release_work_fn(struct work_struct *work)
1631+
{
1632+
struct ublk_device *ub =
1633+
container_of(work, struct ublk_device, exit_work.work);
16011634
struct gendisk *disk;
16021635
int i;
16031636

1637+
/*
1638+
* For zero-copy and auto buffer register modes, I/O references
1639+
* might not be dropped naturally when the daemon is killed, but
1640+
* io_uring guarantees that registered bvec kernel buffers are
1641+
* unregistered finally when freeing io_uring context, then the
1642+
* active references are dropped.
1643+
*
1644+
* Wait until active references are dropped for avoiding use-after-free
1645+
*
1646+
* registered buffer may be unregistered in io_ring's release hander,
1647+
* so have to wait by scheduling work function for avoiding the two
1648+
* file release dependency.
1649+
*/
1650+
if (ublk_check_and_reset_active_ref(ub)) {
1651+
schedule_delayed_work(&ub->exit_work, 1);
1652+
return;
1653+
}
1654+
16041655
/*
16051656
* disk isn't attached yet, either device isn't live, or it has
16061657
* been removed already, so we needn't to do anything
@@ -1673,6 +1724,23 @@ static int ublk_ch_release(struct inode *inode, struct file *filp)
16731724
ublk_reset_ch_dev(ub);
16741725
out:
16751726
clear_bit(UB_STATE_OPEN, &ub->state);
1727+
1728+
/* put the reference grabbed in ublk_ch_release() */
1729+
ublk_put_device(ub);
1730+
}
1731+
1732+
static int ublk_ch_release(struct inode *inode, struct file *filp)
1733+
{
1734+
struct ublk_device *ub = filp->private_data;
1735+
1736+
/*
1737+
* Grab ublk device reference, so it won't be gone until we are
1738+
* really released from work function.
1739+
*/
1740+
ublk_get_device(ub);
1741+
1742+
INIT_DELAYED_WORK(&ub->exit_work, ublk_ch_release_work_fn);
1743+
schedule_delayed_work(&ub->exit_work, 0);
16761744
return 0;
16771745
}
16781746

0 commit comments

Comments
 (0)