Skip to content

Commit a1572e2

Browse files
committed
libsa/zfs: further improve handling of stale labels
Fix two problems with 6dd0803. First problem is that situation when newer label was read before stale one, was handled differently to reverse order case. Second problem is that vdev_free() would free the fully initialized leaf vdev that carried stale label. In a case when vdev carries a stale label, but is still referenced by a different label with new a configuration, we don't want to free it, but rather insert it into the new configuration. o Provide a helper function nvlist_find_vdev_guid() that checks presence of certain GUID in a label. o In top level vdev store the GUID of vdev used to instantiate top vdev. o Cover all possible cases in the block in vdev_probe() where we encounter a known configuration. Make the diagnostic print more informative and looking same regardless of probe order. Make this whole block easier to read reducing one level of indentation for a price of a single comparison at runtime. Reviewed by: mav, imp Differential Revision: https://reviews.freebsd.org/D51913 Fixes: 6dd0803
1 parent 9dbd8ee commit a1572e2

File tree

2 files changed

+113
-21
lines changed

2 files changed

+113
-21
lines changed

stand/libsa/zfs/zfsimpl.c

Lines changed: 111 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -833,6 +833,14 @@ vdev_replacing_read(vdev_t *vdev, const blkptr_t *bp, void *buf,
833833
return (kid->v_read(kid, bp, buf, offset, bytes));
834834
}
835835

836+
/*
837+
* List of vdevs that were fully initialized from their own label, but later a
838+
* newer label was found that obsoleted the stale label, freeing its
839+
* configuration tree. We keep those vdevs around, since a new configuration
840+
* may include them.
841+
*/
842+
static vdev_list_t orphans = STAILQ_HEAD_INITIALIZER(orphans);
843+
836844
static vdev_t *
837845
vdev_find(vdev_list_t *list, uint64_t guid)
838846
{
@@ -854,6 +862,11 @@ vdev_create(uint64_t guid, vdev_read_t *_read)
854862
vdev_t *vdev;
855863
vdev_indirect_config_t *vic;
856864

865+
if ((vdev = vdev_find(&orphans, guid))) {
866+
STAILQ_REMOVE(&orphans, vdev, vdev, v_childlink);
867+
return (vdev);
868+
}
869+
857870
vdev = calloc(1, sizeof(vdev_t));
858871
if (vdev != NULL) {
859872
STAILQ_INIT(&vdev->v_children);
@@ -1101,8 +1114,8 @@ vdev_insert(vdev_t *top_vdev, vdev_t *vdev)
11011114
}
11021115

11031116
static int
1104-
vdev_from_nvlist(spa_t *spa, uint64_t top_guid, uint64_t txg,
1105-
const nvlist_t *nvlist)
1117+
vdev_from_nvlist(spa_t *spa, uint64_t top_guid, uint64_t label_guid,
1118+
uint64_t txg, const nvlist_t *nvlist)
11061119
{
11071120
vdev_t *top_vdev, *vdev;
11081121
nvlist_t **kids = NULL;
@@ -1116,6 +1129,7 @@ vdev_from_nvlist(spa_t *spa, uint64_t top_guid, uint64_t txg,
11161129
return (rc);
11171130
top_vdev->v_spa = spa;
11181131
top_vdev->v_top = top_vdev;
1132+
top_vdev->v_label = label_guid;
11191133
top_vdev->v_txg = txg;
11201134
(void )vdev_insert(spa->spa_root_vdev, top_vdev);
11211135
}
@@ -1160,12 +1174,14 @@ vdev_from_nvlist(spa_t *spa, uint64_t top_guid, uint64_t txg,
11601174
static int
11611175
vdev_init_from_label(spa_t *spa, const nvlist_t *nvlist)
11621176
{
1163-
uint64_t top_guid, txg;
1177+
uint64_t top_guid, label_guid, txg;
11641178
nvlist_t *vdevs;
11651179
int rc;
11661180

11671181
if (nvlist_find(nvlist, ZPOOL_CONFIG_TOP_GUID, DATA_TYPE_UINT64,
11681182
NULL, &top_guid, NULL) ||
1183+
nvlist_find(nvlist, ZPOOL_CONFIG_GUID, DATA_TYPE_UINT64,
1184+
NULL, &label_guid, NULL) != 0 ||
11691185
nvlist_find(nvlist, ZPOOL_CONFIG_POOL_TXG, DATA_TYPE_UINT64,
11701186
NULL, &txg, NULL) != 0 ||
11711187
nvlist_find(nvlist, ZPOOL_CONFIG_VDEV_TREE, DATA_TYPE_NVLIST,
@@ -1174,7 +1190,7 @@ vdev_init_from_label(spa_t *spa, const nvlist_t *nvlist)
11741190
return (ENOENT);
11751191
}
11761192

1177-
rc = vdev_from_nvlist(spa, top_guid, txg, vdevs);
1193+
rc = vdev_from_nvlist(spa, top_guid, label_guid, txg, vdevs);
11781194
nvlist_destroy(vdevs);
11791195
return (rc);
11801196
}
@@ -1271,7 +1287,10 @@ vdev_free(struct vdev *vdev)
12711287

12721288
STAILQ_FOREACH_SAFE(kid, &vdev->v_children, v_childlink, safe)
12731289
vdev_free(kid);
1274-
free(vdev);
1290+
if (vdev->v_phys_read != NULL)
1291+
STAILQ_INSERT_HEAD(&orphans, vdev, v_childlink);
1292+
else
1293+
free(vdev);
12751294
}
12761295

12771296
static int
@@ -1323,7 +1342,7 @@ vdev_init_from_nvlist(spa_t *spa, const nvlist_t *nvlist)
13231342
* XXXGL: how can this happen?
13241343
*/
13251344
if (vdev == NULL)
1326-
rc = vdev_from_nvlist(spa, guid, 0, kids[i]);
1345+
rc = vdev_from_nvlist(spa, guid, 0, 0, kids[i]);
13271346
else
13281347
rc = vdev_update_from_nvlist(spa->spa_root_vdev, guid,
13291348
kids[i]);
@@ -1344,6 +1363,53 @@ vdev_init_from_nvlist(spa_t *spa, const nvlist_t *nvlist)
13441363
return (rc);
13451364
}
13461365

1366+
static bool
1367+
nvlist_find_child_guid(const nvlist_t *nvlist, uint64_t guid)
1368+
{
1369+
nvlist_t **kids = NULL;
1370+
int nkids, i;
1371+
bool rv = false;
1372+
1373+
if (nvlist_find(nvlist, ZPOOL_CONFIG_CHILDREN, DATA_TYPE_NVLIST_ARRAY,
1374+
&nkids, &kids, NULL) != 0)
1375+
nkids = 0;
1376+
1377+
for (i = 0; i < nkids; i++) {
1378+
uint64_t kid_guid;
1379+
1380+
if (nvlist_find(kids[i], ZPOOL_CONFIG_GUID, DATA_TYPE_UINT64,
1381+
NULL, &kid_guid, NULL) != 0)
1382+
break;
1383+
if (kid_guid == guid)
1384+
rv = true;
1385+
else
1386+
rv = nvlist_find_child_guid(kids[i], guid);
1387+
if (rv)
1388+
break;
1389+
}
1390+
1391+
for (i = 0; i < nkids; i++)
1392+
nvlist_destroy(kids[i]);
1393+
free(kids);
1394+
1395+
return (rv);
1396+
}
1397+
1398+
static bool
1399+
nvlist_find_vdev_guid(const nvlist_t *nvlist, uint64_t guid)
1400+
{
1401+
nvlist_t *vdevs;
1402+
bool rv;
1403+
1404+
if (nvlist_find(nvlist, ZPOOL_CONFIG_VDEV_TREE, DATA_TYPE_NVLIST, NULL,
1405+
&vdevs, NULL) != 0)
1406+
return (false);
1407+
rv = nvlist_find_child_guid(vdevs, guid);
1408+
nvlist_destroy(vdevs);
1409+
1410+
return (rv);
1411+
}
1412+
13471413
static spa_t *
13481414
spa_find_by_guid(uint64_t guid)
13491415
{
@@ -2012,7 +2078,7 @@ vdev_probe(vdev_phys_read_t *_read, vdev_phys_write_t *_write, void *priv,
20122078
{
20132079
vdev_t vtmp;
20142080
spa_t *spa;
2015-
vdev_t *vdev;
2081+
vdev_t *vdev, *top;
20162082
nvlist_t *nvl;
20172083
uint64_t val;
20182084
uint64_t guid, pool_guid, top_guid, txg;
@@ -2109,22 +2175,47 @@ vdev_probe(vdev_phys_read_t *_read, vdev_phys_write_t *_write, void *priv,
21092175
nvlist_destroy(nvl);
21102176
return (ENOMEM);
21112177
}
2112-
} else {
2113-
struct vdev *kid;
2114-
2115-
STAILQ_FOREACH(kid, &spa->spa_root_vdev->v_children,
2116-
v_childlink)
2117-
if (kid->v_guid == top_guid && kid->v_txg < txg) {
2118-
printf("ZFS: pool %s vdev %s ignoring stale "
2119-
"label from txg 0x%jx, using 0x%jx@0x%jx\n",
2120-
spa->spa_name, kid->v_name,
2121-
kid->v_txg, guid, txg);
2178+
}
2179+
2180+
/*
2181+
* Check if configuration is already known. If configuration is known
2182+
* and txg numbers don't match, we got 2x2 scenarios here. First, is
2183+
* the label being read right now _newer_ than the one read before.
2184+
* Second, is the vdev that provided the stale label _present_ in the
2185+
* newer configuration. If neither is true, we completely ignore the
2186+
* label.
2187+
*/
2188+
STAILQ_FOREACH(top, &spa->spa_root_vdev->v_children, v_childlink)
2189+
if (top->v_guid == top_guid) {
2190+
bool newer, present;
2191+
2192+
if (top->v_txg == txg)
2193+
break;
2194+
newer = (top->v_txg < txg);
2195+
present = newer ?
2196+
nvlist_find_vdev_guid(nvl, top->v_label) :
2197+
(vdev_find(&top->v_children, guid) != NULL);
2198+
printf("ZFS: pool %s vdev %s %s stale label from "
2199+
"0x%jx@0x%jx, %s 0x%jx@0x%jx\n",
2200+
spa->spa_name, top->v_name,
2201+
present ? "using" : "ignoring",
2202+
newer ? top->v_label : guid,
2203+
newer ? top->v_txg : txg,
2204+
present ? "referred by" : "using",
2205+
newer ? guid : top->v_label,
2206+
newer ? txg : top->v_txg);
2207+
if (newer) {
21222208
STAILQ_REMOVE(&spa->spa_root_vdev->v_children,
2123-
kid, vdev, v_childlink);
2124-
vdev_free(kid);
2209+
top, vdev, v_childlink);
2210+
vdev_free(top);
21252211
break;
2212+
} else if (present) {
2213+
break;
2214+
} else {
2215+
nvlist_destroy(nvl);
2216+
return (EIO);
21262217
}
2127-
}
2218+
}
21282219

21292220
/*
21302221
* Get the vdev tree and create our in-core copy of it.

sys/cddl/boot/zfs/zfsimpl.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2024,7 +2024,8 @@ typedef struct vdev {
20242024
vdev_list_t v_children; /* children of this vdev */
20252025
const char *v_name; /* vdev name */
20262026
uint64_t v_guid; /* vdev guid */
2027-
uint64_t v_txg; /* most recent transaction */
2027+
uint64_t v_label; /* label instantiated from (top vdev) */
2028+
uint64_t v_txg; /* most recent transaction (top vdev) */
20282029
uint64_t v_id; /* index in parent */
20292030
uint64_t v_psize; /* physical device capacity */
20302031
int v_ashift; /* offset to block shift */

0 commit comments

Comments
 (0)