Skip to content

Commit df7decd

Browse files
committed
osd: Move ec_inject interface in ECCommon into ECInject.cc
This is going to be common with the new EC code, so it makes sense to have it in a seperate file. It also does not fit in with the class structure of either new or old code. Signed-off-by: Alex Ainscow <[email protected]>
1 parent c9e67c9 commit df7decd

File tree

8 files changed

+374
-324
lines changed

8 files changed

+374
-324
lines changed

src/osd/CMakeLists.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,8 @@ set(osd_srcs
4747
ECTransactionL.cc
4848
ECUtilL.cc
4949
ECUtil.cc
50+
ECInject.cc
51+
ECInject.h
5052
${CMAKE_SOURCE_DIR}/src/common/TrackedOp.cc
5153
${CMAKE_SOURCE_DIR}/src/mgr/OSDPerfMetricTypes.cc
5254
${osd_cyg_functions_src}

src/osd/ECBackendL.cc

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
#include <sstream>
1717

1818
#include "ECBackendL.h"
19+
#include "ECInject.h"
1920
#include "messages/MOSDPGPush.h"
2021
#include "messages/MOSDPGPushReply.h"
2122
#include "messages/MOSDECSubOpWrite.h"
@@ -947,7 +948,7 @@ void ECBackendL::handle_sub_write(
947948
trace.event("handle_sub_write");
948949

949950
if (cct->_conf->bluestore_debug_inject_read_err &&
950-
ec_inject_test_write_error3(op.soid)) {
951+
ECInject::test_write_error3(op.soid)) {
951952
ceph_abort_msg("Error inject - OSD down");
952953
}
953954
if (!get_parent()->pgb_is_primary())
@@ -1197,7 +1198,7 @@ void ECBackendL::handle_sub_write_reply(
11971198
}
11981199
if (cct->_conf->bluestore_debug_inject_read_err &&
11991200
(i->second->pending_commit.size() == 1) &&
1200-
ec_inject_test_write_error2(i->second->hoid)) {
1201+
ECInject::test_write_error2(i->second->hoid)) {
12011202
std::string cmd =
12021203
"{ \"prefix\": \"osd down\", \"ids\": [\"" + std::to_string( get_parent()->whoami() ) + "\"] }";
12031204
vector<std::string> vcmd{cmd};
@@ -1225,7 +1226,7 @@ void ECBackendL::handle_sub_read_reply(
12251226
for (auto i = op.buffers_read.begin();
12261227
i != op.buffers_read.end();
12271228
++i) {
1228-
if (ec_inject_test_read_error0(ghobject_t(i->first, ghobject_t::NO_GEN, op.from.shard))) {
1229+
if (ECInject::test_read_error0(ghobject_t(i->first, ghobject_t::NO_GEN, op.from.shard))) {
12291230
dout(0) << __func__ << " Error inject - EIO error for shard " << op.from.shard << dendl;
12301231
op.buffers_read.erase(i->first);
12311232
op.attrs_read.erase(i->first);

src/osd/ECCommonL.cc

Lines changed: 3 additions & 304 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
#include <sstream>
1717

1818
#include "ECCommonL.h"
19+
#include "ECInject.h"
1920
#include "messages/MOSDPGPush.h"
2021
#include "messages/MOSDPGPushReply.h"
2122
#include "messages/MOSDECSubOpWrite.h"
@@ -223,7 +224,7 @@ void ECCommonL::ReadPipeline::get_all_avail_shards(
223224
continue;
224225
}
225226
if (cct->_conf->bluestore_debug_inject_read_err &&
226-
ec_inject_test_read_error1(ghobject_t(hoid, ghobject_t::NO_GEN, i->shard))) {
227+
ECInject::test_read_error1(ghobject_t(hoid, ghobject_t::NO_GEN, i->shard))) {
227228
dout(0) << __func__ << " Error inject - Missing shard " << i->shard << dendl;
228229
continue;
229230
}
@@ -918,7 +919,7 @@ bool ECCommonL::RMWPipeline::try_reads_to_commit()
918919
should_write_local = true;
919920
local_write_op.claim(sop);
920921
} else if (cct->_conf->bluestore_debug_inject_read_err &&
921-
ec_inject_test_write_error1(ghobject_t(op->hoid,
922+
ECInject::test_write_error1(ghobject_t(op->hoid,
922923
ghobject_t::NO_GEN, i->shard))) {
923924
dout(0) << " Error inject - Dropping write message to shard " <<
924925
i->shard << dendl;
@@ -1100,305 +1101,3 @@ ECUtilL::HashInfoRef ECCommonL::UnstableHashInfoRegistry::get_hash_info(
11001101
}
11011102
return ref;
11021103
}
1103-
1104-
// Error inject interfaces
1105-
static ceph::recursive_mutex ec_inject_lock =
1106-
ceph::make_recursive_mutex("ECCommon::ec_inject_lock");
1107-
static std::map<ghobject_t,std::pair<int64_t,int64_t>> ec_inject_read_failures0;
1108-
static std::map<ghobject_t,std::pair<int64_t,int64_t>> ec_inject_read_failures1;
1109-
static std::map<ghobject_t,std::pair<int64_t,int64_t>> ec_inject_write_failures0;
1110-
static std::map<ghobject_t,std::pair<int64_t,int64_t>> ec_inject_write_failures1;
1111-
static std::map<ghobject_t,std::pair<int64_t,int64_t>> ec_inject_write_failures2;
1112-
static std::map<ghobject_t,std::pair<int64_t,int64_t>> ec_inject_write_failures3;
1113-
static std::map<ghobject_t,shard_id_t> ec_inject_write_failures0_shard;
1114-
static std::set<osd_reqid_t> ec_inject_write_failures0_reqid;
1115-
1116-
/**
1117-
* Configure a read error inject that typically forces additional reads of
1118-
* shards in an EC pool to recover data using the redundancy. With multiple
1119-
* errors it is possible to force client reads to fail.
1120-
*
1121-
* Type 0 - Simulate a medium error. Fail a read with -EIO to force
1122-
* additional reads and a decode
1123-
*
1124-
* Type 1 - Simulate a missing OSD. Dont even try to read a shard
1125-
*
1126-
* @brief Set up a read error inject for an object in an EC pool.
1127-
* @param o Target object for the error inject.
1128-
* @param when Error inject starts after this many object store reads.
1129-
* @param duration Error inject affects this many object store reads.
1130-
* @param type Type of error inject 0 = EIO, 1 = missing shard.
1131-
* @return string Result of configuring the error inject.
1132-
*/
1133-
std::string ec_inject_read_error(const ghobject_t& o,
1134-
const int64_t type,
1135-
const int64_t when,
1136-
const int64_t duration) {
1137-
std::lock_guard<ceph::recursive_mutex> l(ec_inject_lock);
1138-
ghobject_t os = o;
1139-
if (os.hobj.oid.name == "*") {
1140-
os.hobj.set_hash(0);
1141-
}
1142-
switch (type) {
1143-
case 0:
1144-
ec_inject_read_failures0[os] = std::pair(when, duration);
1145-
return "ok - read returns EIO";
1146-
case 1:
1147-
ec_inject_read_failures1[os] = std::pair(when, duration);
1148-
return "ok - read pretends shard is missing";
1149-
default:
1150-
break;
1151-
}
1152-
return "unrecognized error inject type";
1153-
}
1154-
1155-
/**
1156-
* Configure a write error inject that either fails an OSD or causes a
1157-
* client write operation to be rolled back.
1158-
*
1159-
* Type 0 - Tests rollback. Drop a write I/O to a shard, then simulate an OSD
1160-
* down to force rollback to occur, lastly fail the retried write from the
1161-
* client so the results of the rollback can be inspected.
1162-
*
1163-
* Type 1 - Drop a write I/O to a shard. Used on its own this will hang a
1164-
* write I/O.
1165-
*
1166-
* Type 2 - Simulate an OSD down (ceph osd down) to force a new epoch. Usually
1167-
* used together with type 1 to force a rollback
1168-
*
1169-
* Type 3 - Abort when an OSD processes a write I/O to a shard. Typically the
1170-
* client write will be commited while the OSD is absent which will result in
1171-
* recovery or backfill later when the OSD returns.
1172-
*
1173-
* @brief Set up a write error inject for an object in an EC pool.
1174-
* @param o Target object for the error inject.
1175-
* @param when Error inject starts after this many object store reads.
1176-
* @param duration Error inject affects this many object store reads.
1177-
* @param type Type of error inject 0 = EIO, 1 = missing shard.
1178-
* @return string Result of configuring the error inect.
1179-
*/
1180-
std::string ec_inject_write_error(const ghobject_t& o,
1181-
const int64_t type,
1182-
const int64_t when,
1183-
const int64_t duration) {
1184-
std::lock_guard<ceph::recursive_mutex> l(ec_inject_lock);
1185-
std::map<ghobject_t,std::pair<int64_t,int64_t>> *failures;
1186-
ghobject_t os = o;
1187-
bool no_shard = true;
1188-
std::string result;
1189-
switch (type) {
1190-
case 0:
1191-
failures = &ec_inject_write_failures0;
1192-
result = "ok - drop write, sim OSD down and fail client retry with EINVAL";
1193-
break;
1194-
case 1:
1195-
failures = &ec_inject_write_failures1;
1196-
no_shard = false;
1197-
result = "ok - drop write to shard";
1198-
break;
1199-
case 2:
1200-
failures = &ec_inject_write_failures2;
1201-
result = "ok - inject OSD down";
1202-
break;
1203-
case 3:
1204-
if (duration != 1) {
1205-
return "duration must be 1";
1206-
}
1207-
failures = &ec_inject_write_failures3;
1208-
result = "ok - write abort OSDs";
1209-
break;
1210-
default:
1211-
return "unrecognized error inject type";
1212-
}
1213-
if (no_shard) {
1214-
os.set_shard(shard_id_t::NO_SHARD);
1215-
}
1216-
if (os.hobj.oid.name == "*") {
1217-
os.hobj.set_hash(0);
1218-
}
1219-
(*failures)[os] = std::pair(when, duration);
1220-
if (type == 0) {
1221-
ec_inject_write_failures0_shard[os] = o.shard_id;
1222-
}
1223-
return result;
1224-
}
1225-
1226-
/**
1227-
* @brief Clear a previously configured read error inject.
1228-
* @param o Target object for the error inject.
1229-
* @param type Type of error inject 0 = EIO, 1 = missing shard.
1230-
* @return string Indication of how many errors were cleared.
1231-
*/
1232-
std::string ec_inject_clear_read_error(const ghobject_t& o,
1233-
const int64_t type) {
1234-
std::lock_guard<ceph::recursive_mutex> l(ec_inject_lock);
1235-
std::map<ghobject_t,std::pair<int64_t,int64_t>> *failures;
1236-
ghobject_t os = o;
1237-
int64_t remaining = 0;
1238-
switch (type) {
1239-
case 0:
1240-
failures = &ec_inject_read_failures0;
1241-
break;
1242-
case 1:
1243-
failures = &ec_inject_read_failures1;
1244-
break;
1245-
default:
1246-
return "unrecognized error inject type";
1247-
}
1248-
if (os.hobj.oid.name == "*") {
1249-
os.hobj.set_hash(0);
1250-
}
1251-
auto it = failures->find(os);
1252-
if (it != failures->end()) {
1253-
remaining = it->second.second;
1254-
failures->erase(it);
1255-
}
1256-
if (remaining == 0) {
1257-
return "no outstanding error injects";
1258-
} else if (remaining == 1) {
1259-
return "ok - 1 inject cleared";
1260-
}
1261-
return "ok - " + std::to_string(remaining) + " injects cleared";
1262-
}
1263-
1264-
/**
1265-
* @brief Clear a previously configured write error inject.
1266-
* @param o Target object for the error inject.
1267-
* @param type Type of error inject 0 = EIO, 1 = missing shard.
1268-
* @return string Indication of how many errors were cleared.
1269-
*/
1270-
std::string ec_inject_clear_write_error(const ghobject_t& o,
1271-
const int64_t type) {
1272-
std::lock_guard<ceph::recursive_mutex> l(ec_inject_lock);
1273-
std::map<ghobject_t,std::pair<int64_t,int64_t>> *failures;
1274-
ghobject_t os = o;
1275-
bool no_shard = true;
1276-
int64_t remaining = 0;
1277-
switch (type) {
1278-
case 0:
1279-
failures = &ec_inject_write_failures0;
1280-
break;
1281-
case 1:
1282-
failures = &ec_inject_write_failures1;
1283-
no_shard = false;
1284-
break;
1285-
case 2:
1286-
failures = &ec_inject_write_failures2;
1287-
break;
1288-
case 3:
1289-
failures = &ec_inject_write_failures3;
1290-
break;
1291-
default:
1292-
return "unrecognized error inject type";
1293-
}
1294-
if (no_shard) {
1295-
os.set_shard(shard_id_t::NO_SHARD);
1296-
}
1297-
if (os.hobj.oid.name == "*") {
1298-
os.hobj.set_hash(0);
1299-
}
1300-
auto it = failures->find(os);
1301-
if (it != failures->end()) {
1302-
remaining = it->second.second;
1303-
failures->erase(it);
1304-
if (type == 0) {
1305-
ec_inject_write_failures0_shard.erase(os);
1306-
}
1307-
}
1308-
if (remaining == 0) {
1309-
return "no outstanding error injects";
1310-
} else if (remaining == 1) {
1311-
return "ok - 1 inject cleared";
1312-
}
1313-
return "ok - " + std::to_string(remaining) + " injects cleared";
1314-
}
1315-
1316-
static bool ec_inject_test_error(const ghobject_t& o,
1317-
std::map<ghobject_t,std::pair<int64_t,int64_t>> *failures)
1318-
{
1319-
std::lock_guard<ceph::recursive_mutex> l(ec_inject_lock);
1320-
auto it = failures->find(o);
1321-
if (it == failures->end()) {
1322-
ghobject_t os = o;
1323-
os.hobj.oid.name = "*";
1324-
os.hobj.set_hash(0);
1325-
it = failures->find(os);
1326-
}
1327-
if (it != failures->end()) {
1328-
auto && [when,duration] = it->second;
1329-
if (when > 0) {
1330-
when--;
1331-
return false;
1332-
}
1333-
if (--duration <= 0) {
1334-
failures->erase(it);
1335-
}
1336-
return true;
1337-
}
1338-
return false;
1339-
}
1340-
1341-
bool ec_inject_test_read_error0(const ghobject_t& o)
1342-
{
1343-
return ec_inject_test_error(o, &ec_inject_read_failures0);
1344-
}
1345-
1346-
bool ec_inject_test_read_error1(const ghobject_t& o)
1347-
{
1348-
return ec_inject_test_error(o, &ec_inject_read_failures1);
1349-
}
1350-
1351-
bool ec_inject_test_write_error0(const hobject_t& o,
1352-
const osd_reqid_t& reqid) {
1353-
std::lock_guard<ceph::recursive_mutex> l(ec_inject_lock);
1354-
ghobject_t os = ghobject_t(o, ghobject_t::NO_GEN, shard_id_t::NO_SHARD);
1355-
if (ec_inject_write_failures0_reqid.count(reqid)) {
1356-
// Matched reqid of retried write - flag for failure
1357-
ec_inject_write_failures0_reqid.erase(reqid);
1358-
return true;
1359-
}
1360-
auto it = ec_inject_write_failures0.find(os);
1361-
if (it == ec_inject_write_failures0.end()) {
1362-
os.hobj.oid.name = "*";
1363-
os.hobj.set_hash(0);
1364-
it = ec_inject_write_failures0.find(os);
1365-
}
1366-
if (it != ec_inject_write_failures0.end()) {
1367-
auto && [when, duration] = it->second;
1368-
auto shard = ec_inject_write_failures0_shard.find(os)->second;
1369-
if (when > 0) {
1370-
when--;
1371-
} else {
1372-
if (--duration <= 0) {
1373-
ec_inject_write_failures0.erase(it);
1374-
ec_inject_write_failures0_shard.erase(os);
1375-
}
1376-
// Error inject triggered - save reqid
1377-
ec_inject_write_failures0_reqid.insert(reqid);
1378-
// Set up error inject to drop message to primary
1379-
ec_inject_write_error(ghobject_t(o, ghobject_t::NO_GEN, shard), 1, 0, 1);
1380-
}
1381-
}
1382-
return false;
1383-
}
1384-
1385-
bool ec_inject_test_write_error1(const ghobject_t& o) {
1386-
bool rc = ec_inject_test_error(o, &ec_inject_write_failures1);
1387-
if (rc) {
1388-
// Set up error inject to generate OSD down
1389-
ec_inject_write_error(o, 2, 0, 1);
1390-
}
1391-
return rc;
1392-
}
1393-
1394-
bool ec_inject_test_write_error2(const hobject_t& o) {
1395-
return ec_inject_test_error(
1396-
ghobject_t(o, ghobject_t::NO_GEN, shard_id_t::NO_SHARD),
1397-
&ec_inject_write_failures2);
1398-
}
1399-
1400-
bool ec_inject_test_write_error3(const hobject_t& o) {
1401-
return ec_inject_test_error(
1402-
ghobject_t(o, ghobject_t::NO_GEN, shard_id_t::NO_SHARD),
1403-
&ec_inject_write_failures3);
1404-
}

src/osd/ECCommonL.h

Lines changed: 0 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -683,15 +683,3 @@ void ECCommonL::ReadPipeline::filter_read_op(
683683
on_schedule_recovery(op);
684684
}
685685
}
686-
687-
// Error inject interfaces
688-
std::string ec_inject_read_error(const ghobject_t& o, const int64_t type, const int64_t when, const int64_t duration);
689-
std::string ec_inject_write_error(const ghobject_t& o, const int64_t type, const int64_t when, const int64_t duration);
690-
std::string ec_inject_clear_read_error(const ghobject_t& o, const int64_t type);
691-
std::string ec_inject_clear_write_error(const ghobject_t& o, const int64_t type);
692-
bool ec_inject_test_read_error0(const ghobject_t& o);
693-
bool ec_inject_test_read_error1(const ghobject_t& o);
694-
bool ec_inject_test_write_error0(const hobject_t& o,const osd_reqid_t& reqid);
695-
bool ec_inject_test_write_error1(const ghobject_t& o);
696-
bool ec_inject_test_write_error2(const hobject_t& o);
697-
bool ec_inject_test_write_error3(const hobject_t& o);

0 commit comments

Comments
 (0)