@@ -226,8 +226,14 @@ void ECCommon::ReadPipeline::get_all_avail_shards(
226226 ++i) {
227227 dout (10 ) << __func__ << " : checking acting " << *i << dendl;
228228 const pg_missing_t &missing = get_parent ()->get_shard_missing (*i);
229- if (error_shards.find (*i) != error_shards. end ())
229+ if (error_shards.contains (*i)) {
230230 continue ;
231+ }
232+ if (cct->_conf ->bluestore_debug_inject_read_err &&
233+ ec_inject_test_read_error1 (ghobject_t (hoid, ghobject_t ::NO_GEN, i->shard ))) {
234+ dout (0 ) << __func__ << " Error inject - Missing shard " << i->shard << dendl;
235+ continue ;
236+ }
231237 if (!missing.is_missing (hoid)) {
232238 ceph_assert (!have.count (i->shard ));
233239 have.insert (i->shard );
@@ -912,6 +918,11 @@ bool ECCommon::RMWPipeline::try_reads_to_commit()
912918 if (*i == get_parent ()->whoami_shard ()) {
913919 should_write_local = true ;
914920 local_write_op.claim (sop);
921+ } else if (cct->_conf ->bluestore_debug_inject_read_err &&
922+ ec_inject_test_write_error1 (ghobject_t (op->hoid ,
923+ ghobject_t ::NO_GEN, i->shard ))) {
924+ dout (0 ) << " Error inject - Dropping write message to shard " <<
925+ i->shard << dendl;
915926 } else {
916927 MOSDECSubOpWrite *r = new MOSDECSubOpWrite (sop);
917928 r->pgid = spg_t (get_parent ()->primary_spg_t ().pgid , i->shard );
@@ -1090,3 +1101,305 @@ ECUtil::HashInfoRef ECCommon::UnstableHashInfoRegistry::get_hash_info(
10901101 }
10911102 return ref;
10921103}
1104+
1105+ // Error inject interfaces
1106+ static ceph::recursive_mutex ec_inject_lock =
1107+ ceph::make_recursive_mutex (" ECCommon::ec_inject_lock" );
1108+ static std::map<ghobject_t ,std::pair<int64_t ,int64_t >> ec_inject_read_failures0;
1109+ static std::map<ghobject_t ,std::pair<int64_t ,int64_t >> ec_inject_read_failures1;
1110+ static std::map<ghobject_t ,std::pair<int64_t ,int64_t >> ec_inject_write_failures0;
1111+ static std::map<ghobject_t ,std::pair<int64_t ,int64_t >> ec_inject_write_failures1;
1112+ static std::map<ghobject_t ,std::pair<int64_t ,int64_t >> ec_inject_write_failures2;
1113+ static std::map<ghobject_t ,std::pair<int64_t ,int64_t >> ec_inject_write_failures3;
1114+ static std::map<ghobject_t ,shard_id_t > ec_inject_write_failures0_shard;
1115+ static std::set<osd_reqid_t > ec_inject_write_failures0_reqid;
1116+
1117+ /* *
1118+ * Configure a read error inject that typically forces additional reads of
1119+ * shards in an EC pool to recover data using the redundancy. With multiple
1120+ * errors it is possible to force client reads to fail.
1121+ *
1122+ * Type 0 - Simulate a medium error. Fail a read with -EIO to force
1123+ * additional reads and a decode
1124+ *
1125+ * Type 1 - Simulate a missing OSD. Dont even try to read a shard
1126+ *
1127+ * @brief Set up a read error inject for an object in an EC pool.
1128+ * @param o Target object for the error inject.
1129+ * @param when Error inject starts after this many object store reads.
1130+ * @param duration Error inject affects this many object store reads.
1131+ * @param type Type of error inject 0 = EIO, 1 = missing shard.
1132+ * @return string Result of configuring the error inject.
1133+ */
1134+ std::string ec_inject_read_error (const ghobject_t & o,
1135+ const int64_t type,
1136+ const int64_t when,
1137+ const int64_t duration) {
1138+ std::lock_guard<ceph::recursive_mutex> l (ec_inject_lock);
1139+ ghobject_t os = o;
1140+ if (os.hobj .oid .name == " *" ) {
1141+ os.hobj .set_hash (0 );
1142+ }
1143+ switch (type) {
1144+ case 0 :
1145+ ec_inject_read_failures0[os] = std::pair (when, duration);
1146+ return " ok - read returns EIO" ;
1147+ case 1 :
1148+ ec_inject_read_failures1[os] = std::pair (when, duration);
1149+ return " ok - read pretends shard is missing" ;
1150+ default :
1151+ break ;
1152+ }
1153+ return " unrecognized error inject type" ;
1154+ }
1155+
1156+ /* *
1157+ * Configure a write error inject that either fails an OSD or causes a
1158+ * client write operation to be rolled back.
1159+ *
1160+ * Type 0 - Tests rollback. Drop a write I/O to a shard, then simulate an OSD
1161+ * down to force rollback to occur, lastly fail the retried write from the
1162+ * client so the results of the rollback can be inspected.
1163+ *
1164+ * Type 1 - Drop a write I/O to a shard. Used on its own this will hang a
1165+ * write I/O.
1166+ *
1167+ * Type 2 - Simulate an OSD down (ceph osd down) to force a new epoch. Usually
1168+ * used together with type 1 to force a rollback
1169+ *
1170+ * Type 3 - Abort when an OSD processes a write I/O to a shard. Typically the
1171+ * client write will be commited while the OSD is absent which will result in
1172+ * recovery or backfill later when the OSD returns.
1173+ *
1174+ * @brief Set up a write error inject for an object in an EC pool.
1175+ * @param o Target object for the error inject.
1176+ * @param when Error inject starts after this many object store reads.
1177+ * @param duration Error inject affects this many object store reads.
1178+ * @param type Type of error inject 0 = EIO, 1 = missing shard.
1179+ * @return string Result of configuring the error inect.
1180+ */
1181+ std::string ec_inject_write_error (const ghobject_t & o,
1182+ const int64_t type,
1183+ const int64_t when,
1184+ const int64_t duration) {
1185+ std::lock_guard<ceph::recursive_mutex> l (ec_inject_lock);
1186+ std::map<ghobject_t ,std::pair<int64_t ,int64_t >> *failures;
1187+ ghobject_t os = o;
1188+ bool no_shard = true ;
1189+ std::string result;
1190+ switch (type) {
1191+ case 0 :
1192+ failures = &ec_inject_write_failures0;
1193+ result = " ok - drop write, sim OSD down and fail client retry with EINVAL" ;
1194+ break ;
1195+ case 1 :
1196+ failures = &ec_inject_write_failures1;
1197+ no_shard = false ;
1198+ result = " ok - drop write to shard" ;
1199+ break ;
1200+ case 2 :
1201+ failures = &ec_inject_write_failures2;
1202+ result = " ok - inject OSD down" ;
1203+ break ;
1204+ case 3 :
1205+ if (duration != 1 ) {
1206+ return " duration must be 1" ;
1207+ }
1208+ failures = &ec_inject_write_failures3;
1209+ result = " ok - write abort OSDs" ;
1210+ break ;
1211+ default :
1212+ return " unrecognized error inject type" ;
1213+ }
1214+ if (no_shard) {
1215+ os.set_shard (shard_id_t ::NO_SHARD);
1216+ }
1217+ if (os.hobj .oid .name == " *" ) {
1218+ os.hobj .set_hash (0 );
1219+ }
1220+ (*failures)[os] = std::pair (when, duration);
1221+ if (type == 0 ) {
1222+ ec_inject_write_failures0_shard[os] = o.shard_id ;
1223+ }
1224+ return result;
1225+ }
1226+
1227+ /* *
1228+ * @brief Clear a previously configured read error inject.
1229+ * @param o Target object for the error inject.
1230+ * @param type Type of error inject 0 = EIO, 1 = missing shard.
1231+ * @return string Indication of how many errors were cleared.
1232+ */
1233+ std::string ec_inject_clear_read_error (const ghobject_t & o,
1234+ const int64_t type) {
1235+ std::lock_guard<ceph::recursive_mutex> l (ec_inject_lock);
1236+ std::map<ghobject_t ,std::pair<int64_t ,int64_t >> *failures;
1237+ ghobject_t os = o;
1238+ int64_t remaining = 0 ;
1239+ switch (type) {
1240+ case 0 :
1241+ failures = &ec_inject_read_failures0;
1242+ break ;
1243+ case 1 :
1244+ failures = &ec_inject_read_failures1;
1245+ break ;
1246+ default :
1247+ return " unrecognized error inject type" ;
1248+ }
1249+ if (os.hobj .oid .name == " *" ) {
1250+ os.hobj .set_hash (0 );
1251+ }
1252+ auto it = failures->find (os);
1253+ if (it != failures->end ()) {
1254+ remaining = it->second .second ;
1255+ failures->erase (it);
1256+ }
1257+ if (remaining == 0 ) {
1258+ return " no outstanding error injects" ;
1259+ } else if (remaining == 1 ) {
1260+ return " ok - 1 inject cleared" ;
1261+ }
1262+ return " ok - " + std::to_string (remaining) + " injects cleared" ;
1263+ }
1264+
1265+ /* *
1266+ * @brief Clear a previously configured write error inject.
1267+ * @param o Target object for the error inject.
1268+ * @param type Type of error inject 0 = EIO, 1 = missing shard.
1269+ * @return string Indication of how many errors were cleared.
1270+ */
1271+ std::string ec_inject_clear_write_error (const ghobject_t & o,
1272+ const int64_t type) {
1273+ std::lock_guard<ceph::recursive_mutex> l (ec_inject_lock);
1274+ std::map<ghobject_t ,std::pair<int64_t ,int64_t >> *failures;
1275+ ghobject_t os = o;
1276+ bool no_shard = true ;
1277+ int64_t remaining = 0 ;
1278+ switch (type) {
1279+ case 0 :
1280+ failures = &ec_inject_write_failures0;
1281+ break ;
1282+ case 1 :
1283+ failures = &ec_inject_write_failures1;
1284+ no_shard = false ;
1285+ break ;
1286+ case 2 :
1287+ failures = &ec_inject_write_failures2;
1288+ break ;
1289+ case 3 :
1290+ failures = &ec_inject_write_failures3;
1291+ break ;
1292+ default :
1293+ return " unrecognized error inject type" ;
1294+ }
1295+ if (no_shard) {
1296+ os.set_shard (shard_id_t ::NO_SHARD);
1297+ }
1298+ if (os.hobj .oid .name == " *" ) {
1299+ os.hobj .set_hash (0 );
1300+ }
1301+ auto it = failures->find (os);
1302+ if (it != failures->end ()) {
1303+ remaining = it->second .second ;
1304+ failures->erase (it);
1305+ if (type == 0 ) {
1306+ ec_inject_write_failures0_shard.erase (os);
1307+ }
1308+ }
1309+ if (remaining == 0 ) {
1310+ return " no outstanding error injects" ;
1311+ } else if (remaining == 1 ) {
1312+ return " ok - 1 inject cleared" ;
1313+ }
1314+ return " ok - " + std::to_string (remaining) + " injects cleared" ;
1315+ }
1316+
1317+ static bool ec_inject_test_error (const ghobject_t & o,
1318+ std::map<ghobject_t ,std::pair<int64_t ,int64_t >> *failures)
1319+ {
1320+ std::lock_guard<ceph::recursive_mutex> l (ec_inject_lock);
1321+ auto it = failures->find (o);
1322+ if (it == failures->end ()) {
1323+ ghobject_t os = o;
1324+ os.hobj .oid .name = " *" ;
1325+ os.hobj .set_hash (0 );
1326+ it = failures->find (os);
1327+ }
1328+ if (it != failures->end ()) {
1329+ auto && [when,duration] = it->second ;
1330+ if (when > 0 ) {
1331+ when--;
1332+ return false ;
1333+ }
1334+ if (--duration <= 0 ) {
1335+ failures->erase (it);
1336+ }
1337+ return true ;
1338+ }
1339+ return false ;
1340+ }
1341+
1342+ bool ec_inject_test_read_error0 (const ghobject_t & o)
1343+ {
1344+ return ec_inject_test_error (o, &ec_inject_read_failures0);
1345+ }
1346+
1347+ bool ec_inject_test_read_error1 (const ghobject_t & o)
1348+ {
1349+ return ec_inject_test_error (o, &ec_inject_read_failures1);
1350+ }
1351+
1352+ bool ec_inject_test_write_error0 (const hobject_t & o,
1353+ const osd_reqid_t & reqid) {
1354+ std::lock_guard<ceph::recursive_mutex> l (ec_inject_lock);
1355+ ghobject_t os = ghobject_t (o, ghobject_t ::NO_GEN, shard_id_t ::NO_SHARD);
1356+ if (ec_inject_write_failures0_reqid.count (reqid)) {
1357+ // Matched reqid of retried write - flag for failure
1358+ ec_inject_write_failures0_reqid.erase (reqid);
1359+ return true ;
1360+ }
1361+ auto it = ec_inject_write_failures0.find (os);
1362+ if (it == ec_inject_write_failures0.end ()) {
1363+ os.hobj .oid .name = " *" ;
1364+ os.hobj .set_hash (0 );
1365+ it = ec_inject_write_failures0.find (os);
1366+ }
1367+ if (it != ec_inject_write_failures0.end ()) {
1368+ auto && [when, duration] = it->second ;
1369+ auto shard = ec_inject_write_failures0_shard.find (os)->second ;
1370+ if (when > 0 ) {
1371+ when--;
1372+ } else {
1373+ if (--duration <= 0 ) {
1374+ ec_inject_write_failures0.erase (it);
1375+ ec_inject_write_failures0_shard.erase (os);
1376+ }
1377+ // Error inject triggered - save reqid
1378+ ec_inject_write_failures0_reqid.insert (reqid);
1379+ // Set up error inject to drop message to primary
1380+ ec_inject_write_error (ghobject_t (o, ghobject_t ::NO_GEN, shard), 1 , 0 , 1 );
1381+ }
1382+ }
1383+ return false ;
1384+ }
1385+
1386+ bool ec_inject_test_write_error1 (const ghobject_t & o) {
1387+ bool rc = ec_inject_test_error (o, &ec_inject_write_failures1);
1388+ if (rc) {
1389+ // Set up error inject to generate OSD down
1390+ ec_inject_write_error (o, 2 , 0 , 1 );
1391+ }
1392+ return rc;
1393+ }
1394+
1395+ bool ec_inject_test_write_error2 (const hobject_t & o) {
1396+ return ec_inject_test_error (
1397+ ghobject_t (o, ghobject_t ::NO_GEN, shard_id_t ::NO_SHARD),
1398+ &ec_inject_write_failures2);
1399+ }
1400+
1401+ bool ec_inject_test_write_error3 (const hobject_t & o) {
1402+ return ec_inject_test_error (
1403+ ghobject_t (o, ghobject_t ::NO_GEN, shard_id_t ::NO_SHARD),
1404+ &ec_inject_write_failures3);
1405+ }
0 commit comments