Skip to content

Commit 73679f8

Browse files
idryomovgregkh
authored andcommitted
rbd: retrieve and check lock owner twice before blocklisting
commit 5881590 upstream. An attempt to acquire exclusive lock can race with the current lock owner closing the image: 1. lock is held by client123, rbd_lock() returns -EBUSY 2. get_lock_owner_info() returns client123 instance details 3. client123 closes the image, lock is released 4. find_watcher() returns 0 as there is no matching watcher anymore 5. client123 instance gets erroneously blocklisted Particularly impacted is mirror snapshot scheduler in snapshot-based mirroring since it happens to open and close images a lot (images are opened only for as long as it takes to take the next mirror snapshot, the same client instance is used for all images). To reduce the potential for erroneous blocklisting, retrieve the lock owner again after find_watcher() returns 0. If it's still there, make sure it matches the previously detected lock owner. Cc: [email protected] # f38cb9d: rbd: make get_lock_owner_info() return a single locker or NULL Cc: [email protected] # 8ff2c64: rbd: harden get_lock_owner_info() a bit Cc: [email protected] Signed-off-by: Ilya Dryomov <[email protected]> Reviewed-by: Dongsheng Yang <[email protected]> Signed-off-by: Greg Kroah-Hartman <[email protected]>
1 parent 0c0b641 commit 73679f8

File tree

1 file changed

+23
-2
lines changed

1 file changed

+23
-2
lines changed

drivers/block/rbd.c

Lines changed: 23 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3850,6 +3850,15 @@ static void wake_lock_waiters(struct rbd_device *rbd_dev, int result)
38503850
list_splice_tail_init(&rbd_dev->acquiring_list, &rbd_dev->running_list);
38513851
}
38523852

3853+
static bool locker_equal(const struct ceph_locker *lhs,
3854+
const struct ceph_locker *rhs)
3855+
{
3856+
return lhs->id.name.type == rhs->id.name.type &&
3857+
lhs->id.name.num == rhs->id.name.num &&
3858+
!strcmp(lhs->id.cookie, rhs->id.cookie) &&
3859+
ceph_addr_equal_no_type(&lhs->info.addr, &rhs->info.addr);
3860+
}
3861+
38533862
static void free_locker(struct ceph_locker *locker)
38543863
{
38553864
if (locker)
@@ -3970,11 +3979,11 @@ static int find_watcher(struct rbd_device *rbd_dev,
39703979
static int rbd_try_lock(struct rbd_device *rbd_dev)
39713980
{
39723981
struct ceph_client *client = rbd_dev->rbd_client->client;
3973-
struct ceph_locker *locker;
3982+
struct ceph_locker *locker, *refreshed_locker;
39743983
int ret;
39753984

39763985
for (;;) {
3977-
locker = NULL;
3986+
locker = refreshed_locker = NULL;
39783987

39793988
ret = rbd_lock(rbd_dev);
39803989
if (ret != -EBUSY)
@@ -3994,6 +4003,16 @@ static int rbd_try_lock(struct rbd_device *rbd_dev)
39944003
if (ret)
39954004
goto out; /* request lock or error */
39964005

4006+
refreshed_locker = get_lock_owner_info(rbd_dev);
4007+
if (IS_ERR(refreshed_locker)) {
4008+
ret = PTR_ERR(refreshed_locker);
4009+
refreshed_locker = NULL;
4010+
goto out;
4011+
}
4012+
if (!refreshed_locker ||
4013+
!locker_equal(locker, refreshed_locker))
4014+
goto again;
4015+
39974016
rbd_warn(rbd_dev, "breaking header lock owned by %s%llu",
39984017
ENTITY_NAME(locker->id.name));
39994018

@@ -4015,10 +4034,12 @@ static int rbd_try_lock(struct rbd_device *rbd_dev)
40154034
}
40164035

40174036
again:
4037+
free_locker(refreshed_locker);
40184038
free_locker(locker);
40194039
}
40204040

40214041
out:
4042+
free_locker(refreshed_locker);
40224043
free_locker(locker);
40234044
return ret;
40244045
}

0 commit comments

Comments
 (0)