Skip to content

Commit e7c08ec

Browse files
committed
osd: read osd superblock from both disk and omap and choose the best
Signed-off-by: Igor Fedotov <[email protected]>
1 parent 2e9c723 commit e7c08ec

File tree

1 file changed

+61
-21
lines changed

1 file changed

+61
-21
lines changed

src/osd/OSD.cc

Lines changed: 61 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -4748,38 +4748,78 @@ int OSD::update_crush_device_class()
47484748

47494749
int OSD::read_superblock()
47504750
{
4751+
// Read superblock from both object data and omap metadata
4752+
// for better robustness.
4753+
// Use the most recent superblock replica if obtained versions
4754+
// mismatch.
47514755
bufferlist bl;
4752-
4756+
47534757
set<string> keys;
47544758
keys.insert(OSD_SUPERBLOCK_OMAP_KEY);
47554759
map<string, bufferlist> vals;
4756-
// Let's read from OMAP first to be able to better handle
4757-
// "recover-after-an-error' case when main OSD volume data
4758-
// is partially corrupted (csums don't match for a bunch of onodes).
4759-
// As a result we might want to set bluestore_ignore_csum_error option which
4760-
// will silent disk read errors.
4761-
// Clearly such a reading from corrupted superblock will miss an error as well
4762-
// and it wouldn't attempt to use still valid OMAP's replica.
4763-
// Hence preferring omap reading over disk one.
4764-
int r = store->omap_get_values(
4760+
OSDSuperblock super_omap;
4761+
OSDSuperblock super_disk;
4762+
int r_omap = store->omap_get_values(
47654763
service.meta_ch, OSD_SUPERBLOCK_GOBJECT, keys, &vals);
4766-
if (r < 0 || vals.size() == 0) {
4767-
dout(10) << __func__ << " attempt reading from disk replica" << dendl;
4768-
4769-
r = store->read(service.meta_ch, OSD_SUPERBLOCK_GOBJECT, 0, 0, bl);
4770-
if (r < 0) {
4771-
return -ENOENT;
4764+
if (r_omap >= 0 && vals.size() > 0) {
4765+
try {
4766+
auto p = vals.begin()->second.cbegin();
4767+
decode(super_omap, p);
4768+
} catch(...) {
4769+
derr << __func__ << " omap replica is corrupted."
4770+
<< dendl;
4771+
r_omap = -EFAULT;
4772+
}
4773+
} else {
4774+
derr << __func__ << " omap replica is missing."
4775+
<< dendl;
4776+
r_omap = -ENOENT;
4777+
}
4778+
int r_disk = store->read(service.meta_ch, OSD_SUPERBLOCK_GOBJECT, 0, 0, bl);
4779+
if (r_disk >= 0) {
4780+
try {
4781+
auto p = bl.cbegin();
4782+
decode(super_disk, p);
4783+
} catch(...) {
4784+
derr << __func__ << " disk replica is corrupted."
4785+
<< dendl;
4786+
r_disk = -EFAULT;
47724787
}
4773-
dout(10) << __func__ << " got disk replica" << dendl;
47744788
} else {
4775-
std::swap(bl, vals.begin()->second);
4789+
derr << __func__ << " disk replica is missing."
4790+
<< dendl;
4791+
r_disk = -ENOENT;
47764792
}
47774793

4778-
auto p = bl.cbegin();
4779-
decode(superblock, p);
4794+
if (r_omap >= 0 && r_disk < 0) {
4795+
std::swap(superblock, super_omap);
4796+
dout(1) << __func__ << " got omap replica but failed to get disk one."
4797+
<< dendl;
4798+
} else if (r_omap < 0 && r_disk >= 0) {
4799+
std::swap(superblock, super_disk);
4800+
dout(1) << __func__ << " got disk replica but failed to get omap one."
4801+
<< dendl;
4802+
} else if (r_omap < 0 && r_disk < 0) {
4803+
// error to be logged by the caller
4804+
return -ENOENT;
4805+
} else {
4806+
std::swap(superblock, super_omap); // let omap be the primary source
4807+
if (superblock.current_epoch != super_disk.current_epoch) {
4808+
derr << __func__ << " got mismatching superblocks, omap:"
4809+
<< superblock << " vs. disk:" << super_disk
4810+
<< dendl;
4811+
if (superblock.current_epoch < super_disk.current_epoch) {
4812+
std::swap(superblock, super_disk);
4813+
dout(0) << __func__ << " using disk superblock"
4814+
<< dendl;
4815+
} else {
4816+
dout(0) << __func__ << " using omap superblock"
4817+
<< dendl;
4818+
}
4819+
}
4820+
}
47804821

47814822
dout(10) << "read_superblock " << superblock << dendl;
4782-
47834823
return 0;
47844824
}
47854825

0 commit comments

Comments
 (0)