Skip to content

Commit 9dbd8ee

Browse files
committed
libsa/zfs: refactor vdev tree for better resiliency against stale disks
Before this change in vdev_insert() we would avoid inserting a duplicate vdev to the list of children, however this duplicate being unlinked from the parent is still stored on the global list with initialized v_guid. Such leaked duplicate can later be returned by vdev_find(). After 6dd0803 such leaked vdev may be freed or pointing to a freed parent, which leads to a loader crash. Note that the leak problem was there before 6dd0803. First, in vdev_insert() free conflicting vdev and return the existing one. Update callers accordingly. There is only one caller that actually may encounter this condition. Second, eliminate global list of vdevs and make vdev_find() to work recursively on the tree that a caller must provide. Of course, a chance of GUID collision between members of different pools is extremely low. The main motivation here is just to increase code robustness and fully isolate the data structures of different pools being tasted by the loader, and make easier debugging of bugs like the one being fixed. Reviewed by: mav, imp Differential Revision: https://reviews.freebsd.org/D51912 Fixes: 6dd0803
1 parent 8ef5016 commit 9dbd8ee

File tree

2 files changed

+22
-29
lines changed

2 files changed

+22
-29
lines changed

stand/libsa/zfs/zfsimpl.c

Lines changed: 22 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -106,11 +106,6 @@ typedef struct indirect_vsd {
106106
list_t iv_splits; /* list of indirect_split_t's */
107107
} indirect_vsd_t;
108108

109-
/*
110-
* List of all vdevs, chained through v_alllink.
111-
*/
112-
static vdev_list_t zfs_vdevs;
113-
114109
/*
115110
* List of supported read-incompatible ZFS features. Do not add here features
116111
* marked as ZFEATURE_FLAG_READONLY_COMPAT, they are irrelevant for read-only!
@@ -167,7 +162,6 @@ vdev_indirect_mapping_entry_phys_t *
167162
static void
168163
zfs_init(void)
169164
{
170-
STAILQ_INIT(&zfs_vdevs);
171165
STAILQ_INIT(&zfs_pools);
172166

173167
dnode_cache_buf = malloc(SPA_MAXBLOCKSIZE);
@@ -840,15 +834,18 @@ vdev_replacing_read(vdev_t *vdev, const blkptr_t *bp, void *buf,
840834
}
841835

842836
static vdev_t *
843-
vdev_find(uint64_t guid)
837+
vdev_find(vdev_list_t *list, uint64_t guid)
844838
{
845-
vdev_t *vdev;
839+
vdev_t *vdev, *safe;
846840

847-
STAILQ_FOREACH(vdev, &zfs_vdevs, v_alllink)
841+
STAILQ_FOREACH_SAFE(vdev, list, v_childlink, safe) {
848842
if (vdev->v_guid == guid)
849843
return (vdev);
844+
if ((vdev = vdev_find(&vdev->v_children, guid)) != NULL)
845+
return (vdev);
846+
}
850847

851-
return (0);
848+
return (NULL);
852849
}
853850

854851
static vdev_t *
@@ -871,7 +868,6 @@ vdev_create(uint64_t guid, vdev_read_t *_read)
871868
if (_read != NULL) {
872869
vic = &vdev->vdev_indirect_config;
873870
vic->vic_prev_indirect_vdev = UINT64_MAX;
874-
STAILQ_INSERT_TAIL(&zfs_vdevs, vdev, v_alllink);
875871
}
876872
}
877873

@@ -1069,7 +1065,7 @@ vdev_child_count(vdev_t *vdev)
10691065
/*
10701066
* Insert vdev into top_vdev children list. List is ordered by v_id.
10711067
*/
1072-
static void
1068+
static vdev_t *
10731069
vdev_insert(vdev_t *top_vdev, vdev_t *vdev)
10741070
{
10751071
vdev_t *previous;
@@ -1091,7 +1087,8 @@ vdev_insert(vdev_t *top_vdev, vdev_t *vdev)
10911087
* This vdev was configured from label config,
10921088
* do not insert duplicate.
10931089
*/
1094-
return;
1090+
free(vdev);
1091+
return (previous);
10951092
} else {
10961093
STAILQ_INSERT_AFTER(&top_vdev->v_children, previous, vdev,
10971094
v_childlink);
@@ -1100,6 +1097,7 @@ vdev_insert(vdev_t *top_vdev, vdev_t *vdev)
11001097
count = vdev_child_count(top_vdev);
11011098
if (top_vdev->v_nchildren < count)
11021099
top_vdev->v_nchildren = count;
1100+
return (vdev);
11031101
}
11041102

11051103
static int
@@ -1111,15 +1109,15 @@ vdev_from_nvlist(spa_t *spa, uint64_t top_guid, uint64_t txg,
11111109
int rc, nkids;
11121110

11131111
/* Get top vdev. */
1114-
top_vdev = vdev_find(top_guid);
1112+
top_vdev = vdev_find(&spa->spa_root_vdev->v_children, top_guid);
11151113
if (top_vdev == NULL) {
11161114
rc = vdev_init(top_guid, nvlist, &top_vdev);
11171115
if (rc != 0)
11181116
return (rc);
11191117
top_vdev->v_spa = spa;
11201118
top_vdev->v_top = top_vdev;
11211119
top_vdev->v_txg = txg;
1122-
vdev_insert(spa->spa_root_vdev, top_vdev);
1120+
(void )vdev_insert(spa->spa_root_vdev, top_vdev);
11231121
}
11241122

11251123
/* Add children if there are any. */
@@ -1140,7 +1138,7 @@ vdev_from_nvlist(spa_t *spa, uint64_t top_guid, uint64_t txg,
11401138

11411139
vdev->v_spa = spa;
11421140
vdev->v_top = top_vdev;
1143-
vdev_insert(top_vdev, vdev);
1141+
vdev = vdev_insert(top_vdev, vdev);
11441142
}
11451143
} else {
11461144
/*
@@ -1227,14 +1225,14 @@ vdev_set_state(vdev_t *vdev)
12271225
}
12281226

12291227
static int
1230-
vdev_update_from_nvlist(uint64_t top_guid, const nvlist_t *nvlist)
1228+
vdev_update_from_nvlist(vdev_t *root, uint64_t top_guid, const nvlist_t *nvlist)
12311229
{
12321230
vdev_t *vdev;
12331231
nvlist_t **kids = NULL;
12341232
int rc, nkids;
12351233

12361234
/* Update top vdev. */
1237-
vdev = vdev_find(top_guid);
1235+
vdev = vdev_find(&root->v_children, top_guid);
12381236
if (vdev != NULL)
12391237
vdev_set_initial_state(vdev, nvlist);
12401238

@@ -1250,7 +1248,7 @@ vdev_update_from_nvlist(uint64_t top_guid, const nvlist_t *nvlist)
12501248
if (rc != 0)
12511249
break;
12521250

1253-
vdev = vdev_find(guid);
1251+
vdev = vdev_find(&root->v_children, guid);
12541252
if (vdev != NULL)
12551253
vdev_set_initial_state(vdev, kids[i]);
12561254
}
@@ -1266,18 +1264,13 @@ vdev_update_from_nvlist(uint64_t top_guid, const nvlist_t *nvlist)
12661264
return (rc);
12671265
}
12681266

1269-
/*
1270-
* Shall not be called on root vdev, that is not linked into zfs_vdevs.
1271-
* See comment in vdev_create().
1272-
*/
12731267
static void
12741268
vdev_free(struct vdev *vdev)
12751269
{
12761270
struct vdev *kid, *safe;
12771271

12781272
STAILQ_FOREACH_SAFE(kid, &vdev->v_children, v_childlink, safe)
12791273
vdev_free(kid);
1280-
STAILQ_REMOVE(&zfs_vdevs, vdev, vdev, v_alllink);
12811274
free(vdev);
12821275
}
12831276

@@ -1324,15 +1317,16 @@ vdev_init_from_nvlist(spa_t *spa, const nvlist_t *nvlist)
13241317
NULL, &guid, NULL);
13251318
if (rc != 0)
13261319
break;
1327-
vdev = vdev_find(guid);
1320+
vdev = vdev_find(&spa->spa_root_vdev->v_children, guid);
13281321
/*
13291322
* Top level vdev is missing, create it.
13301323
* XXXGL: how can this happen?
13311324
*/
13321325
if (vdev == NULL)
13331326
rc = vdev_from_nvlist(spa, guid, 0, kids[i]);
13341327
else
1335-
rc = vdev_update_from_nvlist(guid, kids[i]);
1328+
rc = vdev_update_from_nvlist(spa->spa_root_vdev, guid,
1329+
kids[i]);
13361330
if (rc != 0)
13371331
break;
13381332
}
@@ -2138,7 +2132,7 @@ vdev_probe(vdev_phys_read_t *_read, vdev_phys_write_t *_write, void *priv,
21382132
* be some kind of alias (overlapping slices, dangerously dedicated
21392133
* disks etc).
21402134
*/
2141-
vdev = vdev_find(guid);
2135+
vdev = vdev_find(&spa->spa_root_vdev->v_children, guid);
21422136
/* Has this vdev already been inited? */
21432137
if (vdev && vdev->v_phys_read) {
21442138
nvlist_destroy(nvl);
@@ -2154,7 +2148,7 @@ vdev_probe(vdev_phys_read_t *_read, vdev_phys_write_t *_write, void *priv,
21542148
* We should already have created an incomplete vdev for this
21552149
* vdev. Find it and initialise it with our read proc.
21562150
*/
2157-
vdev = vdev_find(guid);
2151+
vdev = vdev_find(&spa->spa_root_vdev->v_children, guid);
21582152
if (vdev != NULL) {
21592153
vdev->v_phys_read = _read;
21602154
vdev->v_phys_write = _write;

sys/cddl/boot/zfs/zfsimpl.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2021,7 +2021,6 @@ typedef struct vdev_indirect_config {
20212021

20222022
typedef struct vdev {
20232023
STAILQ_ENTRY(vdev) v_childlink; /* link in parent's child list */
2024-
STAILQ_ENTRY(vdev) v_alllink; /* link in global vdev list */
20252024
vdev_list_t v_children; /* children of this vdev */
20262025
const char *v_name; /* vdev name */
20272026
uint64_t v_guid; /* vdev guid */

0 commit comments

Comments
 (0)