Skip to content

Commit 23dd255

Browse files
bill-scalesJonBailey1993
authored andcommitted
osd: EC error inject interfaces
Error inject interfaces for EC reads and writes using ceph tell osd.<n> interface Signed-off-by: Bill Scales <[email protected]>
1 parent 4f3ef50 commit 23dd255

File tree

7 files changed

+466
-6
lines changed

7 files changed

+466
-6
lines changed

src/osd/ECBackend.cc

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -945,6 +945,10 @@ void ECBackend::handle_sub_write(
945945
}
946946
trace.event("handle_sub_write");
947947

948+
if (cct->_conf->bluestore_debug_inject_read_err &&
949+
ec_inject_test_write_error3(op.soid)) {
950+
ceph_abort_msg("Error inject - OSD down");
951+
}
948952
if (!get_parent()->pgb_is_primary())
949953
get_parent()->update_stats(op.stats);
950954
ObjectStore::Transaction localt;
@@ -1191,6 +1195,15 @@ void ECBackend::handle_sub_write_reply(
11911195
i->second->on_all_commit = 0;
11921196
i->second->trace.event("ec write all committed");
11931197
}
1198+
if (cct->_conf->bluestore_debug_inject_read_err &&
1199+
(i->second->pending_commit.size() == 1) &&
1200+
ec_inject_test_write_error2(i->second->hoid)) {
1201+
std::string cmd =
1202+
"{ \"prefix\": \"osd down\", \"ids\": [\"" + std::to_string( get_parent()->whoami() ) + "\"] }";
1203+
vector<std::string> vcmd{cmd};
1204+
dout(0) << __func__ << " Error inject - marking OSD down" << dendl;
1205+
get_parent()->start_mon_command(vcmd, {}, nullptr, nullptr, nullptr);
1206+
}
11941207
rmw_pipeline.check_ops();
11951208
}
11961209

@@ -1208,6 +1221,19 @@ void ECBackend::handle_sub_read_reply(
12081221
return;
12091222
}
12101223
ReadOp &rop = iter->second;
1224+
if (cct->_conf->bluestore_debug_inject_read_err) {
1225+
for (auto i = op.buffers_read.begin();
1226+
i != op.buffers_read.end();
1227+
++i) {
1228+
if (ec_inject_test_read_error0(ghobject_t(i->first, ghobject_t::NO_GEN, op.from.shard))) {
1229+
dout(0) << __func__ << " Error inject - EIO error for shard " << op.from.shard << dendl;
1230+
op.buffers_read.erase(i->first);
1231+
op.attrs_read.erase(i->first);
1232+
op.errors[i->first] = -EIO;
1233+
}
1234+
1235+
}
1236+
}
12111237
for (auto i = op.buffers_read.begin();
12121238
i != op.buffers_read.end();
12131239
++i) {

src/osd/ECCommon.cc

Lines changed: 314 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -226,8 +226,14 @@ void ECCommon::ReadPipeline::get_all_avail_shards(
226226
++i) {
227227
dout(10) << __func__ << ": checking acting " << *i << dendl;
228228
const pg_missing_t &missing = get_parent()->get_shard_missing(*i);
229-
if (error_shards.find(*i) != error_shards.end())
229+
if (error_shards.contains(*i)) {
230230
continue;
231+
}
232+
if (cct->_conf->bluestore_debug_inject_read_err &&
233+
ec_inject_test_read_error1(ghobject_t(hoid, ghobject_t::NO_GEN, i->shard))) {
234+
dout(0) << __func__ << " Error inject - Missing shard " << i->shard << dendl;
235+
continue;
236+
}
231237
if (!missing.is_missing(hoid)) {
232238
ceph_assert(!have.count(i->shard));
233239
have.insert(i->shard);
@@ -912,6 +918,11 @@ bool ECCommon::RMWPipeline::try_reads_to_commit()
912918
if (*i == get_parent()->whoami_shard()) {
913919
should_write_local = true;
914920
local_write_op.claim(sop);
921+
} else if (cct->_conf->bluestore_debug_inject_read_err &&
922+
ec_inject_test_write_error1(ghobject_t(op->hoid,
923+
ghobject_t::NO_GEN, i->shard))) {
924+
dout(0) << " Error inject - Dropping write message to shard " <<
925+
i->shard << dendl;
915926
} else {
916927
MOSDECSubOpWrite *r = new MOSDECSubOpWrite(sop);
917928
r->pgid = spg_t(get_parent()->primary_spg_t().pgid, i->shard);
@@ -1090,3 +1101,305 @@ ECUtil::HashInfoRef ECCommon::UnstableHashInfoRegistry::get_hash_info(
10901101
}
10911102
return ref;
10921103
}
1104+
1105+
// Error inject interfaces
1106+
static ceph::recursive_mutex ec_inject_lock =
1107+
ceph::make_recursive_mutex("ECCommon::ec_inject_lock");
1108+
static std::map<ghobject_t,std::pair<int64_t,int64_t>> ec_inject_read_failures0;
1109+
static std::map<ghobject_t,std::pair<int64_t,int64_t>> ec_inject_read_failures1;
1110+
static std::map<ghobject_t,std::pair<int64_t,int64_t>> ec_inject_write_failures0;
1111+
static std::map<ghobject_t,std::pair<int64_t,int64_t>> ec_inject_write_failures1;
1112+
static std::map<ghobject_t,std::pair<int64_t,int64_t>> ec_inject_write_failures2;
1113+
static std::map<ghobject_t,std::pair<int64_t,int64_t>> ec_inject_write_failures3;
1114+
static std::map<ghobject_t,shard_id_t> ec_inject_write_failures0_shard;
1115+
static std::set<osd_reqid_t> ec_inject_write_failures0_reqid;
1116+
1117+
/**
1118+
* Configure a read error inject that typically forces additional reads of
1119+
* shards in an EC pool to recover data using the redundancy. With multiple
1120+
* errors it is possible to force client reads to fail.
1121+
*
1122+
* Type 0 - Simulate a medium error. Fail a read with -EIO to force
1123+
* additional reads and a decode
1124+
*
1125+
* Type 1 - Simulate a missing OSD. Dont even try to read a shard
1126+
*
1127+
* @brief Set up a read error inject for an object in an EC pool.
1128+
* @param o Target object for the error inject.
1129+
* @param when Error inject starts after this many object store reads.
1130+
* @param duration Error inject affects this many object store reads.
1131+
* @param type Type of error inject 0 = EIO, 1 = missing shard.
1132+
* @return string Result of configuring the error inject.
1133+
*/
1134+
std::string ec_inject_read_error(const ghobject_t& o,
1135+
const int64_t type,
1136+
const int64_t when,
1137+
const int64_t duration) {
1138+
std::lock_guard<ceph::recursive_mutex> l(ec_inject_lock);
1139+
ghobject_t os = o;
1140+
if (os.hobj.oid.name == "*") {
1141+
os.hobj.set_hash(0);
1142+
}
1143+
switch (type) {
1144+
case 0:
1145+
ec_inject_read_failures0[os] = std::pair(when, duration);
1146+
return "ok - read returns EIO";
1147+
case 1:
1148+
ec_inject_read_failures1[os] = std::pair(when, duration);
1149+
return "ok - read pretends shard is missing";
1150+
default:
1151+
break;
1152+
}
1153+
return "unrecognized error inject type";
1154+
}
1155+
1156+
/**
1157+
* Configure a write error inject that either fails an OSD or causes a
1158+
* client write operation to be rolled back.
1159+
*
1160+
* Type 0 - Tests rollback. Drop a write I/O to a shard, then simulate an OSD
1161+
* down to force rollback to occur, lastly fail the retried write from the
1162+
* client so the results of the rollback can be inspected.
1163+
*
1164+
* Type 1 - Drop a write I/O to a shard. Used on its own this will hang a
1165+
* write I/O.
1166+
*
1167+
* Type 2 - Simulate an OSD down (ceph osd down) to force a new epoch. Usually
1168+
* used together with type 1 to force a rollback
1169+
*
1170+
* Type 3 - Abort when an OSD processes a write I/O to a shard. Typically the
1171+
* client write will be commited while the OSD is absent which will result in
1172+
* recovery or backfill later when the OSD returns.
1173+
*
1174+
* @brief Set up a write error inject for an object in an EC pool.
1175+
* @param o Target object for the error inject.
1176+
* @param when Error inject starts after this many object store reads.
1177+
* @param duration Error inject affects this many object store reads.
1178+
* @param type Type of error inject 0 = EIO, 1 = missing shard.
1179+
* @return string Result of configuring the error inect.
1180+
*/
1181+
std::string ec_inject_write_error(const ghobject_t& o,
1182+
const int64_t type,
1183+
const int64_t when,
1184+
const int64_t duration) {
1185+
std::lock_guard<ceph::recursive_mutex> l(ec_inject_lock);
1186+
std::map<ghobject_t,std::pair<int64_t,int64_t>> *failures;
1187+
ghobject_t os = o;
1188+
bool no_shard = true;
1189+
std::string result;
1190+
switch (type) {
1191+
case 0:
1192+
failures = &ec_inject_write_failures0;
1193+
result = "ok - drop write, sim OSD down and fail client retry with EINVAL";
1194+
break;
1195+
case 1:
1196+
failures = &ec_inject_write_failures1;
1197+
no_shard = false;
1198+
result = "ok - drop write to shard";
1199+
break;
1200+
case 2:
1201+
failures = &ec_inject_write_failures2;
1202+
result = "ok - inject OSD down";
1203+
break;
1204+
case 3:
1205+
if (duration != 1) {
1206+
return "duration must be 1";
1207+
}
1208+
failures = &ec_inject_write_failures3;
1209+
result = "ok - write abort OSDs";
1210+
break;
1211+
default:
1212+
return "unrecognized error inject type";
1213+
}
1214+
if (no_shard) {
1215+
os.set_shard(shard_id_t::NO_SHARD);
1216+
}
1217+
if (os.hobj.oid.name == "*") {
1218+
os.hobj.set_hash(0);
1219+
}
1220+
(*failures)[os] = std::pair(when, duration);
1221+
if (type == 0) {
1222+
ec_inject_write_failures0_shard[os] = o.shard_id;
1223+
}
1224+
return result;
1225+
}
1226+
1227+
/**
1228+
* @brief Clear a previously configured read error inject.
1229+
* @param o Target object for the error inject.
1230+
* @param type Type of error inject 0 = EIO, 1 = missing shard.
1231+
* @return string Indication of how many errors were cleared.
1232+
*/
1233+
std::string ec_inject_clear_read_error(const ghobject_t& o,
1234+
const int64_t type) {
1235+
std::lock_guard<ceph::recursive_mutex> l(ec_inject_lock);
1236+
std::map<ghobject_t,std::pair<int64_t,int64_t>> *failures;
1237+
ghobject_t os = o;
1238+
int64_t remaining = 0;
1239+
switch (type) {
1240+
case 0:
1241+
failures = &ec_inject_read_failures0;
1242+
break;
1243+
case 1:
1244+
failures = &ec_inject_read_failures1;
1245+
break;
1246+
default:
1247+
return "unrecognized error inject type";
1248+
}
1249+
if (os.hobj.oid.name == "*") {
1250+
os.hobj.set_hash(0);
1251+
}
1252+
auto it = failures->find(os);
1253+
if (it != failures->end()) {
1254+
remaining = it->second.second;
1255+
failures->erase(it);
1256+
}
1257+
if (remaining == 0) {
1258+
return "no outstanding error injects";
1259+
} else if (remaining == 1) {
1260+
return "ok - 1 inject cleared";
1261+
}
1262+
return "ok - " + std::to_string(remaining) + " injects cleared";
1263+
}
1264+
1265+
/**
1266+
* @brief Clear a previously configured write error inject.
1267+
* @param o Target object for the error inject.
1268+
* @param type Type of error inject 0 = EIO, 1 = missing shard.
1269+
* @return string Indication of how many errors were cleared.
1270+
*/
1271+
std::string ec_inject_clear_write_error(const ghobject_t& o,
1272+
const int64_t type) {
1273+
std::lock_guard<ceph::recursive_mutex> l(ec_inject_lock);
1274+
std::map<ghobject_t,std::pair<int64_t,int64_t>> *failures;
1275+
ghobject_t os = o;
1276+
bool no_shard = true;
1277+
int64_t remaining = 0;
1278+
switch (type) {
1279+
case 0:
1280+
failures = &ec_inject_write_failures0;
1281+
break;
1282+
case 1:
1283+
failures = &ec_inject_write_failures1;
1284+
no_shard = false;
1285+
break;
1286+
case 2:
1287+
failures = &ec_inject_write_failures2;
1288+
break;
1289+
case 3:
1290+
failures = &ec_inject_write_failures3;
1291+
break;
1292+
default:
1293+
return "unrecognized error inject type";
1294+
}
1295+
if (no_shard) {
1296+
os.set_shard(shard_id_t::NO_SHARD);
1297+
}
1298+
if (os.hobj.oid.name == "*") {
1299+
os.hobj.set_hash(0);
1300+
}
1301+
auto it = failures->find(os);
1302+
if (it != failures->end()) {
1303+
remaining = it->second.second;
1304+
failures->erase(it);
1305+
if (type == 0) {
1306+
ec_inject_write_failures0_shard.erase(os);
1307+
}
1308+
}
1309+
if (remaining == 0) {
1310+
return "no outstanding error injects";
1311+
} else if (remaining == 1) {
1312+
return "ok - 1 inject cleared";
1313+
}
1314+
return "ok - " + std::to_string(remaining) + " injects cleared";
1315+
}
1316+
1317+
static bool ec_inject_test_error(const ghobject_t& o,
1318+
std::map<ghobject_t,std::pair<int64_t,int64_t>> *failures)
1319+
{
1320+
std::lock_guard<ceph::recursive_mutex> l(ec_inject_lock);
1321+
auto it = failures->find(o);
1322+
if (it == failures->end()) {
1323+
ghobject_t os = o;
1324+
os.hobj.oid.name = "*";
1325+
os.hobj.set_hash(0);
1326+
it = failures->find(os);
1327+
}
1328+
if (it != failures->end()) {
1329+
auto && [when,duration] = it->second;
1330+
if (when > 0) {
1331+
when--;
1332+
return false;
1333+
}
1334+
if (--duration <= 0) {
1335+
failures->erase(it);
1336+
}
1337+
return true;
1338+
}
1339+
return false;
1340+
}
1341+
1342+
bool ec_inject_test_read_error0(const ghobject_t& o)
1343+
{
1344+
return ec_inject_test_error(o, &ec_inject_read_failures0);
1345+
}
1346+
1347+
bool ec_inject_test_read_error1(const ghobject_t& o)
1348+
{
1349+
return ec_inject_test_error(o, &ec_inject_read_failures1);
1350+
}
1351+
1352+
bool ec_inject_test_write_error0(const hobject_t& o,
1353+
const osd_reqid_t& reqid) {
1354+
std::lock_guard<ceph::recursive_mutex> l(ec_inject_lock);
1355+
ghobject_t os = ghobject_t(o, ghobject_t::NO_GEN, shard_id_t::NO_SHARD);
1356+
if (ec_inject_write_failures0_reqid.count(reqid)) {
1357+
// Matched reqid of retried write - flag for failure
1358+
ec_inject_write_failures0_reqid.erase(reqid);
1359+
return true;
1360+
}
1361+
auto it = ec_inject_write_failures0.find(os);
1362+
if (it == ec_inject_write_failures0.end()) {
1363+
os.hobj.oid.name = "*";
1364+
os.hobj.set_hash(0);
1365+
it = ec_inject_write_failures0.find(os);
1366+
}
1367+
if (it != ec_inject_write_failures0.end()) {
1368+
auto && [when, duration] = it->second;
1369+
auto shard = ec_inject_write_failures0_shard.find(os)->second;
1370+
if (when > 0) {
1371+
when--;
1372+
} else {
1373+
if (--duration <= 0) {
1374+
ec_inject_write_failures0.erase(it);
1375+
ec_inject_write_failures0_shard.erase(os);
1376+
}
1377+
// Error inject triggered - save reqid
1378+
ec_inject_write_failures0_reqid.insert(reqid);
1379+
// Set up error inject to drop message to primary
1380+
ec_inject_write_error(ghobject_t(o, ghobject_t::NO_GEN, shard), 1, 0, 1);
1381+
}
1382+
}
1383+
return false;
1384+
}
1385+
1386+
bool ec_inject_test_write_error1(const ghobject_t& o) {
1387+
bool rc = ec_inject_test_error(o, &ec_inject_write_failures1);
1388+
if (rc) {
1389+
// Set up error inject to generate OSD down
1390+
ec_inject_write_error(o, 2, 0, 1);
1391+
}
1392+
return rc;
1393+
}
1394+
1395+
bool ec_inject_test_write_error2(const hobject_t& o) {
1396+
return ec_inject_test_error(
1397+
ghobject_t(o, ghobject_t::NO_GEN, shard_id_t::NO_SHARD),
1398+
&ec_inject_write_failures2);
1399+
}
1400+
1401+
bool ec_inject_test_write_error3(const hobject_t& o) {
1402+
return ec_inject_test_error(
1403+
ghobject_t(o, ghobject_t::NO_GEN, shard_id_t::NO_SHARD),
1404+
&ec_inject_write_failures3);
1405+
}

0 commit comments

Comments
 (0)