|
16 | 16 | #include <sstream> |
17 | 17 |
|
18 | 18 | #include "ECCommonL.h" |
| 19 | +#include "ECInject.h" |
19 | 20 | #include "messages/MOSDPGPush.h" |
20 | 21 | #include "messages/MOSDPGPushReply.h" |
21 | 22 | #include "messages/MOSDECSubOpWrite.h" |
@@ -223,7 +224,7 @@ void ECCommonL::ReadPipeline::get_all_avail_shards( |
223 | 224 | continue; |
224 | 225 | } |
225 | 226 | if (cct->_conf->bluestore_debug_inject_read_err && |
226 | | - ec_inject_test_read_error1(ghobject_t(hoid, ghobject_t::NO_GEN, i->shard))) { |
| 227 | + ECInject::test_read_error1(ghobject_t(hoid, ghobject_t::NO_GEN, i->shard))) { |
227 | 228 | dout(0) << __func__ << " Error inject - Missing shard " << i->shard << dendl; |
228 | 229 | continue; |
229 | 230 | } |
@@ -918,7 +919,7 @@ bool ECCommonL::RMWPipeline::try_reads_to_commit() |
918 | 919 | should_write_local = true; |
919 | 920 | local_write_op.claim(sop); |
920 | 921 | } else if (cct->_conf->bluestore_debug_inject_read_err && |
921 | | - ec_inject_test_write_error1(ghobject_t(op->hoid, |
| 922 | + ECInject::test_write_error1(ghobject_t(op->hoid, |
922 | 923 | ghobject_t::NO_GEN, i->shard))) { |
923 | 924 | dout(0) << " Error inject - Dropping write message to shard " << |
924 | 925 | i->shard << dendl; |
@@ -1100,305 +1101,3 @@ ECUtilL::HashInfoRef ECCommonL::UnstableHashInfoRegistry::get_hash_info( |
1100 | 1101 | } |
1101 | 1102 | return ref; |
1102 | 1103 | } |
1103 | | - |
1104 | | -// Error inject interfaces |
1105 | | -static ceph::recursive_mutex ec_inject_lock = |
1106 | | - ceph::make_recursive_mutex("ECCommon::ec_inject_lock"); |
1107 | | -static std::map<ghobject_t,std::pair<int64_t,int64_t>> ec_inject_read_failures0; |
1108 | | -static std::map<ghobject_t,std::pair<int64_t,int64_t>> ec_inject_read_failures1; |
1109 | | -static std::map<ghobject_t,std::pair<int64_t,int64_t>> ec_inject_write_failures0; |
1110 | | -static std::map<ghobject_t,std::pair<int64_t,int64_t>> ec_inject_write_failures1; |
1111 | | -static std::map<ghobject_t,std::pair<int64_t,int64_t>> ec_inject_write_failures2; |
1112 | | -static std::map<ghobject_t,std::pair<int64_t,int64_t>> ec_inject_write_failures3; |
1113 | | -static std::map<ghobject_t,shard_id_t> ec_inject_write_failures0_shard; |
1114 | | -static std::set<osd_reqid_t> ec_inject_write_failures0_reqid; |
1115 | | - |
1116 | | -/** |
1117 | | - * Configure a read error inject that typically forces additional reads of |
1118 | | - * shards in an EC pool to recover data using the redundancy. With multiple |
1119 | | - * errors it is possible to force client reads to fail. |
1120 | | - * |
1121 | | - * Type 0 - Simulate a medium error. Fail a read with -EIO to force |
1122 | | - * additional reads and a decode |
1123 | | - * |
1124 | | - * Type 1 - Simulate a missing OSD. Dont even try to read a shard |
1125 | | - * |
1126 | | - * @brief Set up a read error inject for an object in an EC pool. |
1127 | | - * @param o Target object for the error inject. |
1128 | | - * @param when Error inject starts after this many object store reads. |
1129 | | - * @param duration Error inject affects this many object store reads. |
1130 | | - * @param type Type of error inject 0 = EIO, 1 = missing shard. |
1131 | | - * @return string Result of configuring the error inject. |
1132 | | - */ |
1133 | | -std::string ec_inject_read_error(const ghobject_t& o, |
1134 | | - const int64_t type, |
1135 | | - const int64_t when, |
1136 | | - const int64_t duration) { |
1137 | | - std::lock_guard<ceph::recursive_mutex> l(ec_inject_lock); |
1138 | | - ghobject_t os = o; |
1139 | | - if (os.hobj.oid.name == "*") { |
1140 | | - os.hobj.set_hash(0); |
1141 | | - } |
1142 | | - switch (type) { |
1143 | | - case 0: |
1144 | | - ec_inject_read_failures0[os] = std::pair(when, duration); |
1145 | | - return "ok - read returns EIO"; |
1146 | | - case 1: |
1147 | | - ec_inject_read_failures1[os] = std::pair(when, duration); |
1148 | | - return "ok - read pretends shard is missing"; |
1149 | | - default: |
1150 | | - break; |
1151 | | - } |
1152 | | - return "unrecognized error inject type"; |
1153 | | -} |
1154 | | - |
1155 | | -/** |
1156 | | - * Configure a write error inject that either fails an OSD or causes a |
1157 | | - * client write operation to be rolled back. |
1158 | | - * |
1159 | | - * Type 0 - Tests rollback. Drop a write I/O to a shard, then simulate an OSD |
1160 | | - * down to force rollback to occur, lastly fail the retried write from the |
1161 | | - * client so the results of the rollback can be inspected. |
1162 | | - * |
1163 | | - * Type 1 - Drop a write I/O to a shard. Used on its own this will hang a |
1164 | | - * write I/O. |
1165 | | - * |
1166 | | - * Type 2 - Simulate an OSD down (ceph osd down) to force a new epoch. Usually |
1167 | | - * used together with type 1 to force a rollback |
1168 | | - * |
1169 | | - * Type 3 - Abort when an OSD processes a write I/O to a shard. Typically the |
1170 | | - * client write will be commited while the OSD is absent which will result in |
1171 | | - * recovery or backfill later when the OSD returns. |
1172 | | - * |
1173 | | - * @brief Set up a write error inject for an object in an EC pool. |
1174 | | - * @param o Target object for the error inject. |
1175 | | - * @param when Error inject starts after this many object store reads. |
1176 | | - * @param duration Error inject affects this many object store reads. |
1177 | | - * @param type Type of error inject 0 = EIO, 1 = missing shard. |
1178 | | - * @return string Result of configuring the error inect. |
1179 | | - */ |
1180 | | -std::string ec_inject_write_error(const ghobject_t& o, |
1181 | | - const int64_t type, |
1182 | | - const int64_t when, |
1183 | | - const int64_t duration) { |
1184 | | - std::lock_guard<ceph::recursive_mutex> l(ec_inject_lock); |
1185 | | - std::map<ghobject_t,std::pair<int64_t,int64_t>> *failures; |
1186 | | - ghobject_t os = o; |
1187 | | - bool no_shard = true; |
1188 | | - std::string result; |
1189 | | - switch (type) { |
1190 | | - case 0: |
1191 | | - failures = &ec_inject_write_failures0; |
1192 | | - result = "ok - drop write, sim OSD down and fail client retry with EINVAL"; |
1193 | | - break; |
1194 | | - case 1: |
1195 | | - failures = &ec_inject_write_failures1; |
1196 | | - no_shard = false; |
1197 | | - result = "ok - drop write to shard"; |
1198 | | - break; |
1199 | | - case 2: |
1200 | | - failures = &ec_inject_write_failures2; |
1201 | | - result = "ok - inject OSD down"; |
1202 | | - break; |
1203 | | - case 3: |
1204 | | - if (duration != 1) { |
1205 | | - return "duration must be 1"; |
1206 | | - } |
1207 | | - failures = &ec_inject_write_failures3; |
1208 | | - result = "ok - write abort OSDs"; |
1209 | | - break; |
1210 | | - default: |
1211 | | - return "unrecognized error inject type"; |
1212 | | - } |
1213 | | - if (no_shard) { |
1214 | | - os.set_shard(shard_id_t::NO_SHARD); |
1215 | | - } |
1216 | | - if (os.hobj.oid.name == "*") { |
1217 | | - os.hobj.set_hash(0); |
1218 | | - } |
1219 | | - (*failures)[os] = std::pair(when, duration); |
1220 | | - if (type == 0) { |
1221 | | - ec_inject_write_failures0_shard[os] = o.shard_id; |
1222 | | - } |
1223 | | - return result; |
1224 | | -} |
1225 | | - |
1226 | | -/** |
1227 | | - * @brief Clear a previously configured read error inject. |
1228 | | - * @param o Target object for the error inject. |
1229 | | - * @param type Type of error inject 0 = EIO, 1 = missing shard. |
1230 | | - * @return string Indication of how many errors were cleared. |
1231 | | - */ |
1232 | | -std::string ec_inject_clear_read_error(const ghobject_t& o, |
1233 | | - const int64_t type) { |
1234 | | - std::lock_guard<ceph::recursive_mutex> l(ec_inject_lock); |
1235 | | - std::map<ghobject_t,std::pair<int64_t,int64_t>> *failures; |
1236 | | - ghobject_t os = o; |
1237 | | - int64_t remaining = 0; |
1238 | | - switch (type) { |
1239 | | - case 0: |
1240 | | - failures = &ec_inject_read_failures0; |
1241 | | - break; |
1242 | | - case 1: |
1243 | | - failures = &ec_inject_read_failures1; |
1244 | | - break; |
1245 | | - default: |
1246 | | - return "unrecognized error inject type"; |
1247 | | - } |
1248 | | - if (os.hobj.oid.name == "*") { |
1249 | | - os.hobj.set_hash(0); |
1250 | | - } |
1251 | | - auto it = failures->find(os); |
1252 | | - if (it != failures->end()) { |
1253 | | - remaining = it->second.second; |
1254 | | - failures->erase(it); |
1255 | | - } |
1256 | | - if (remaining == 0) { |
1257 | | - return "no outstanding error injects"; |
1258 | | - } else if (remaining == 1) { |
1259 | | - return "ok - 1 inject cleared"; |
1260 | | - } |
1261 | | - return "ok - " + std::to_string(remaining) + " injects cleared"; |
1262 | | -} |
1263 | | - |
1264 | | -/** |
1265 | | - * @brief Clear a previously configured write error inject. |
1266 | | - * @param o Target object for the error inject. |
1267 | | - * @param type Type of error inject 0 = EIO, 1 = missing shard. |
1268 | | - * @return string Indication of how many errors were cleared. |
1269 | | - */ |
1270 | | -std::string ec_inject_clear_write_error(const ghobject_t& o, |
1271 | | - const int64_t type) { |
1272 | | - std::lock_guard<ceph::recursive_mutex> l(ec_inject_lock); |
1273 | | - std::map<ghobject_t,std::pair<int64_t,int64_t>> *failures; |
1274 | | - ghobject_t os = o; |
1275 | | - bool no_shard = true; |
1276 | | - int64_t remaining = 0; |
1277 | | - switch (type) { |
1278 | | - case 0: |
1279 | | - failures = &ec_inject_write_failures0; |
1280 | | - break; |
1281 | | - case 1: |
1282 | | - failures = &ec_inject_write_failures1; |
1283 | | - no_shard = false; |
1284 | | - break; |
1285 | | - case 2: |
1286 | | - failures = &ec_inject_write_failures2; |
1287 | | - break; |
1288 | | - case 3: |
1289 | | - failures = &ec_inject_write_failures3; |
1290 | | - break; |
1291 | | - default: |
1292 | | - return "unrecognized error inject type"; |
1293 | | - } |
1294 | | - if (no_shard) { |
1295 | | - os.set_shard(shard_id_t::NO_SHARD); |
1296 | | - } |
1297 | | - if (os.hobj.oid.name == "*") { |
1298 | | - os.hobj.set_hash(0); |
1299 | | - } |
1300 | | - auto it = failures->find(os); |
1301 | | - if (it != failures->end()) { |
1302 | | - remaining = it->second.second; |
1303 | | - failures->erase(it); |
1304 | | - if (type == 0) { |
1305 | | - ec_inject_write_failures0_shard.erase(os); |
1306 | | - } |
1307 | | - } |
1308 | | - if (remaining == 0) { |
1309 | | - return "no outstanding error injects"; |
1310 | | - } else if (remaining == 1) { |
1311 | | - return "ok - 1 inject cleared"; |
1312 | | - } |
1313 | | - return "ok - " + std::to_string(remaining) + " injects cleared"; |
1314 | | -} |
1315 | | - |
1316 | | -static bool ec_inject_test_error(const ghobject_t& o, |
1317 | | - std::map<ghobject_t,std::pair<int64_t,int64_t>> *failures) |
1318 | | -{ |
1319 | | - std::lock_guard<ceph::recursive_mutex> l(ec_inject_lock); |
1320 | | - auto it = failures->find(o); |
1321 | | - if (it == failures->end()) { |
1322 | | - ghobject_t os = o; |
1323 | | - os.hobj.oid.name = "*"; |
1324 | | - os.hobj.set_hash(0); |
1325 | | - it = failures->find(os); |
1326 | | - } |
1327 | | - if (it != failures->end()) { |
1328 | | - auto && [when,duration] = it->second; |
1329 | | - if (when > 0) { |
1330 | | - when--; |
1331 | | - return false; |
1332 | | - } |
1333 | | - if (--duration <= 0) { |
1334 | | - failures->erase(it); |
1335 | | - } |
1336 | | - return true; |
1337 | | - } |
1338 | | - return false; |
1339 | | -} |
1340 | | - |
1341 | | -bool ec_inject_test_read_error0(const ghobject_t& o) |
1342 | | -{ |
1343 | | - return ec_inject_test_error(o, &ec_inject_read_failures0); |
1344 | | -} |
1345 | | - |
1346 | | -bool ec_inject_test_read_error1(const ghobject_t& o) |
1347 | | -{ |
1348 | | - return ec_inject_test_error(o, &ec_inject_read_failures1); |
1349 | | -} |
1350 | | - |
1351 | | -bool ec_inject_test_write_error0(const hobject_t& o, |
1352 | | - const osd_reqid_t& reqid) { |
1353 | | - std::lock_guard<ceph::recursive_mutex> l(ec_inject_lock); |
1354 | | - ghobject_t os = ghobject_t(o, ghobject_t::NO_GEN, shard_id_t::NO_SHARD); |
1355 | | - if (ec_inject_write_failures0_reqid.count(reqid)) { |
1356 | | - // Matched reqid of retried write - flag for failure |
1357 | | - ec_inject_write_failures0_reqid.erase(reqid); |
1358 | | - return true; |
1359 | | - } |
1360 | | - auto it = ec_inject_write_failures0.find(os); |
1361 | | - if (it == ec_inject_write_failures0.end()) { |
1362 | | - os.hobj.oid.name = "*"; |
1363 | | - os.hobj.set_hash(0); |
1364 | | - it = ec_inject_write_failures0.find(os); |
1365 | | - } |
1366 | | - if (it != ec_inject_write_failures0.end()) { |
1367 | | - auto && [when, duration] = it->second; |
1368 | | - auto shard = ec_inject_write_failures0_shard.find(os)->second; |
1369 | | - if (when > 0) { |
1370 | | - when--; |
1371 | | - } else { |
1372 | | - if (--duration <= 0) { |
1373 | | - ec_inject_write_failures0.erase(it); |
1374 | | - ec_inject_write_failures0_shard.erase(os); |
1375 | | - } |
1376 | | - // Error inject triggered - save reqid |
1377 | | - ec_inject_write_failures0_reqid.insert(reqid); |
1378 | | - // Set up error inject to drop message to primary |
1379 | | - ec_inject_write_error(ghobject_t(o, ghobject_t::NO_GEN, shard), 1, 0, 1); |
1380 | | - } |
1381 | | - } |
1382 | | - return false; |
1383 | | -} |
1384 | | - |
1385 | | -bool ec_inject_test_write_error1(const ghobject_t& o) { |
1386 | | - bool rc = ec_inject_test_error(o, &ec_inject_write_failures1); |
1387 | | - if (rc) { |
1388 | | - // Set up error inject to generate OSD down |
1389 | | - ec_inject_write_error(o, 2, 0, 1); |
1390 | | - } |
1391 | | - return rc; |
1392 | | -} |
1393 | | - |
1394 | | -bool ec_inject_test_write_error2(const hobject_t& o) { |
1395 | | - return ec_inject_test_error( |
1396 | | - ghobject_t(o, ghobject_t::NO_GEN, shard_id_t::NO_SHARD), |
1397 | | - &ec_inject_write_failures2); |
1398 | | -} |
1399 | | - |
1400 | | -bool ec_inject_test_write_error3(const hobject_t& o) { |
1401 | | - return ec_inject_test_error( |
1402 | | - ghobject_t(o, ghobject_t::NO_GEN, shard_id_t::NO_SHARD), |
1403 | | - &ec_inject_write_failures3); |
1404 | | -} |
0 commit comments