Skip to content

Commit d869da9

Browse files
chuckleveramschuma-ntap
authored andcommitted
nfs/blocklayout: Fix premature PR key unregistration
During generic/069 runs with pNFS SCSI layouts, the NFS client emits the following in the system journal: kernel: pNFS: failed to open device /dev/disk/by-id/dm-uuid-mpath-0x6001405e3366f045b7949eb8e4540b51 (-2) kernel: pNFS: using block device sdb (reservation key 0x666b60901e7b26b3) kernel: pNFS: failed to open device /dev/disk/by-id/dm-uuid-mpath-0x6001405e3366f045b7949eb8e4540b51 (-2) kernel: pNFS: using block device sdb (reservation key 0x666b60901e7b26b3) kernel: sd 6:0:0:1: reservation conflict kernel: sd 6:0:0:1: [sdb] tag#16 FAILED Result: hostbyte=DID_OK driverbyte=DRIVER_OK cmd_age=0s kernel: sd 6:0:0:1: [sdb] tag#16 CDB: Write(10) 2a 00 00 00 00 50 00 00 08 00 kernel: reservation conflict error, dev sdb, sector 80 op 0x1:(WRITE) flags 0x0 phys_seg 1 prio class 2 kernel: sd 6:0:0:1: reservation conflict kernel: sd 6:0:0:1: reservation conflict kernel: sd 6:0:0:1: [sdb] tag#18 FAILED Result: hostbyte=DID_OK driverbyte=DRIVER_OK cmd_age=0s kernel: sd 6:0:0:1: [sdb] tag#17 FAILED Result: hostbyte=DID_OK driverbyte=DRIVER_OK cmd_age=0s kernel: sd 6:0:0:1: [sdb] tag#18 CDB: Write(10) 2a 00 00 00 00 60 00 00 08 00 kernel: sd 6:0:0:1: [sdb] tag#17 CDB: Write(10) 2a 00 00 00 00 58 00 00 08 00 kernel: reservation conflict error, dev sdb, sector 96 op 0x1:(WRITE) flags 0x0 phys_seg 1 prio class 0 kernel: reservation conflict error, dev sdb, sector 88 op 0x1:(WRITE) flags 0x0 phys_seg 1 prio class 0 systemd[1]: fstests-generic-069.scope: Deactivated successfully. systemd[1]: fstests-generic-069.scope: Consumed 5.092s CPU time. systemd[1]: media-test.mount: Deactivated successfully. systemd[1]: media-scratch.mount: Deactivated successfully. kernel: sd 6:0:0:1: reservation conflict kernel: failed to unregister PR key. This appears to be due to a race. bl_alloc_lseg() calls this: 561 static struct nfs4_deviceid_node * 562 bl_find_get_deviceid(struct nfs_server *server, 563 const struct nfs4_deviceid *id, const struct cred *cred, 564 gfp_t gfp_mask) 565 { 566 struct nfs4_deviceid_node *node; 567 unsigned long start, end; 568 569 retry: 570 node = nfs4_find_get_deviceid(server, id, cred, gfp_mask); 571 if (!node) 572 return ERR_PTR(-ENODEV); nfs4_find_get_deviceid() does a lookup without the spin lock first. If it can't find a matching deviceid, it creates a new device_info (which calls bl_alloc_deviceid_node, and that registers the device's PR key). Then it takes the nfs4_deviceid_lock and looks up the deviceid again. If it finds it this time, bl_find_get_deviceid() frees the spare (new) device_info, which unregisters the PR key for the same device. Any subsequent I/O from this client on that device gets EBADE. The umount later unregisters the device's PR key again. To prevent this problem, register the PR key after the deviceid_node lookup. Signed-off-by: Christoph Hellwig <[email protected]> Signed-off-by: Chuck Lever <[email protected]> Reviewed-by: Christoph Hellwig <[email protected]> Reviewed-by: Benjamin Coddington <[email protected]> Signed-off-by: Anna Schumaker <[email protected]>
1 parent 5468fc8 commit d869da9

File tree

3 files changed

+94
-31
lines changed

3 files changed

+94
-31
lines changed

fs/nfs/blocklayout/blocklayout.c

Lines changed: 16 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -564,25 +564,32 @@ bl_find_get_deviceid(struct nfs_server *server,
564564
gfp_t gfp_mask)
565565
{
566566
struct nfs4_deviceid_node *node;
567-
unsigned long start, end;
567+
int err = -ENODEV;
568568

569569
retry:
570570
node = nfs4_find_get_deviceid(server, id, cred, gfp_mask);
571571
if (!node)
572572
return ERR_PTR(-ENODEV);
573573

574-
if (test_bit(NFS_DEVICEID_UNAVAILABLE, &node->flags) == 0)
575-
return node;
574+
if (test_bit(NFS_DEVICEID_UNAVAILABLE, &node->flags)) {
575+
unsigned long end = jiffies;
576+
unsigned long start = end - PNFS_DEVICE_RETRY_TIMEOUT;
576577

577-
end = jiffies;
578-
start = end - PNFS_DEVICE_RETRY_TIMEOUT;
579-
if (!time_in_range(node->timestamp_unavailable, start, end)) {
580-
nfs4_delete_deviceid(node->ld, node->nfs_client, id);
581-
goto retry;
578+
if (!time_in_range(node->timestamp_unavailable, start, end)) {
579+
nfs4_delete_deviceid(node->ld, node->nfs_client, id);
580+
goto retry;
581+
}
582+
goto out_put;
582583
}
583584

585+
if (!bl_register_dev(container_of(node, struct pnfs_block_dev, node)))
586+
goto out_put;
587+
588+
return node;
589+
590+
out_put:
584591
nfs4_put_deviceid_node(node);
585-
return ERR_PTR(-ENODEV);
592+
return ERR_PTR(err);
586593
}
587594

588595
static int

fs/nfs/blocklayout/blocklayout.h

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -104,20 +104,26 @@ struct pnfs_block_dev {
104104
u64 start;
105105
u64 len;
106106

107+
enum pnfs_block_volume_type type;
107108
u32 nr_children;
108109
struct pnfs_block_dev *children;
109110
u64 chunk_size;
110111

111112
struct file *bdev_file;
112113
u64 disk_offset;
114+
unsigned long flags;
113115

114116
u64 pr_key;
115-
bool pr_registered;
116117

117118
bool (*map)(struct pnfs_block_dev *dev, u64 offset,
118119
struct pnfs_block_dev_map *map);
119120
};
120121

122+
/* pnfs_block_dev flag bits */
123+
enum {
124+
PNFS_BDEV_REGISTERED = 0,
125+
};
126+
121127
/* sector_t fields are all in 512-byte sectors */
122128
struct pnfs_block_extent {
123129
union {
@@ -172,6 +178,7 @@ struct bl_msg_hdr {
172178
#define BL_DEVICE_REQUEST_ERR 0x2 /* User level process fails */
173179

174180
/* dev.c */
181+
bool bl_register_dev(struct pnfs_block_dev *d);
175182
struct nfs4_deviceid_node *bl_alloc_deviceid_node(struct nfs_server *server,
176183
struct pnfs_device *pdev, gfp_t gfp_mask);
177184
void bl_free_deviceid_node(struct nfs4_deviceid_node *d);

fs/nfs/blocklayout/dev.c

Lines changed: 70 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -13,27 +13,82 @@
1313

1414
#define NFSDBG_FACILITY NFSDBG_PNFS_LD
1515

16+
static void bl_unregister_scsi(struct pnfs_block_dev *dev)
17+
{
18+
struct block_device *bdev = file_bdev(dev->bdev_file);
19+
const struct pr_ops *ops = bdev->bd_disk->fops->pr_ops;
20+
21+
if (!test_and_clear_bit(PNFS_BDEV_REGISTERED, &dev->flags))
22+
return;
23+
24+
if (ops->pr_register(bdev, dev->pr_key, 0, false))
25+
pr_err("failed to unregister PR key.\n");
26+
}
27+
28+
static bool bl_register_scsi(struct pnfs_block_dev *dev)
29+
{
30+
struct block_device *bdev = file_bdev(dev->bdev_file);
31+
const struct pr_ops *ops = bdev->bd_disk->fops->pr_ops;
32+
int status;
33+
34+
if (test_and_set_bit(PNFS_BDEV_REGISTERED, &dev->flags))
35+
return true;
36+
37+
status = ops->pr_register(bdev, 0, dev->pr_key, true);
38+
if (status) {
39+
pr_err("pNFS: failed to register key for block device %s.",
40+
bdev->bd_disk->disk_name);
41+
return false;
42+
}
43+
return true;
44+
}
45+
46+
static void bl_unregister_dev(struct pnfs_block_dev *dev)
47+
{
48+
u32 i;
49+
50+
if (dev->nr_children) {
51+
for (i = 0; i < dev->nr_children; i++)
52+
bl_unregister_dev(&dev->children[i]);
53+
return;
54+
}
55+
56+
if (dev->type == PNFS_BLOCK_VOLUME_SCSI)
57+
bl_unregister_scsi(dev);
58+
}
59+
60+
bool bl_register_dev(struct pnfs_block_dev *dev)
61+
{
62+
u32 i;
63+
64+
if (dev->nr_children) {
65+
for (i = 0; i < dev->nr_children; i++) {
66+
if (!bl_register_dev(&dev->children[i])) {
67+
while (i > 0)
68+
bl_unregister_dev(&dev->children[--i]);
69+
return false;
70+
}
71+
}
72+
return true;
73+
}
74+
75+
if (dev->type == PNFS_BLOCK_VOLUME_SCSI)
76+
return bl_register_scsi(dev);
77+
return true;
78+
}
79+
1680
static void
1781
bl_free_device(struct pnfs_block_dev *dev)
1882
{
83+
bl_unregister_dev(dev);
84+
1985
if (dev->nr_children) {
2086
int i;
2187

2288
for (i = 0; i < dev->nr_children; i++)
2389
bl_free_device(&dev->children[i]);
2490
kfree(dev->children);
2591
} else {
26-
if (dev->pr_registered) {
27-
const struct pr_ops *ops =
28-
file_bdev(dev->bdev_file)->bd_disk->fops->pr_ops;
29-
int error;
30-
31-
error = ops->pr_register(file_bdev(dev->bdev_file),
32-
dev->pr_key, 0, false);
33-
if (error)
34-
pr_err("failed to unregister PR key.\n");
35-
}
36-
3792
if (dev->bdev_file)
3893
fput(dev->bdev_file);
3994
}
@@ -365,14 +420,6 @@ bl_parse_scsi(struct nfs_server *server, struct pnfs_block_dev *d,
365420
goto out_blkdev_put;
366421
}
367422

368-
error = ops->pr_register(file_bdev(d->bdev_file), 0, d->pr_key, true);
369-
if (error) {
370-
pr_err("pNFS: failed to register key for block device %s.",
371-
file_bdev(d->bdev_file)->bd_disk->disk_name);
372-
goto out_blkdev_put;
373-
}
374-
375-
d->pr_registered = true;
376423
return 0;
377424

378425
out_blkdev_put:
@@ -458,7 +505,9 @@ static int
458505
bl_parse_deviceid(struct nfs_server *server, struct pnfs_block_dev *d,
459506
struct pnfs_block_volume *volumes, int idx, gfp_t gfp_mask)
460507
{
461-
switch (volumes[idx].type) {
508+
d->type = volumes[idx].type;
509+
510+
switch (d->type) {
462511
case PNFS_BLOCK_VOLUME_SIMPLE:
463512
return bl_parse_simple(server, d, volumes, idx, gfp_mask);
464513
case PNFS_BLOCK_VOLUME_SLICE:
@@ -470,7 +519,7 @@ bl_parse_deviceid(struct nfs_server *server, struct pnfs_block_dev *d,
470519
case PNFS_BLOCK_VOLUME_SCSI:
471520
return bl_parse_scsi(server, d, volumes, idx, gfp_mask);
472521
default:
473-
dprintk("unsupported volume type: %d\n", volumes[idx].type);
522+
dprintk("unsupported volume type: %d\n", d->type);
474523
return -EIO;
475524
}
476525
}

0 commit comments

Comments
 (0)