1616#include < sstream>
1717
1818#include " ECBackend.h"
19+ #include " ECInject.h"
1920#include " messages/MOSDPGPush.h"
2021#include " messages/MOSDPGPushReply.h"
2122#include " messages/MOSDECSubOpWrite.h"
2223#include " messages/MOSDECSubOpWriteReply.h"
2324#include " messages/MOSDECSubOpRead.h"
2425#include " messages/MOSDECSubOpReadReply.h"
2526#include " ECMsgTypes.h"
27+ #include " ECTypes.h"
28+ #include " ECSwitch.h"
2629
2730#include " PrimaryLogPG.h"
2831#include " osd_tracer.h"
@@ -121,16 +124,14 @@ void ECBackend::RecoveryBackend::RecoveryOp::dump(Formatter *f) const
121124
122125ECBackend::ECBackend (
123126 PGBackend::Listener *pg,
124- const coll_t &coll,
125- ObjectStore::CollectionHandle &ch,
126- ObjectStore *store,
127127 CephContext *cct,
128128 ErasureCodeInterfaceRef ec_impl,
129- uint64_t stripe_width)
130- : PGBackend(cct, pg, store, coll, ch),
129+ uint64_t stripe_width,
130+ ECSwitch *s)
131+ : parent(pg), cct(cct), switcher(s),
131132 read_pipeline(cct, ec_impl, this ->sinfo, get_parent()->get_eclistener()),
132133 rmw_pipeline(cct, ec_impl, this ->sinfo, get_parent()->get_eclistener(), *this),
133- recovery_backend(cct, this ->coll, ec_impl, this ->sinfo, read_pipeline, unstable_hashinfo_registry, get_parent(), this),
134+ recovery_backend(cct, switcher ->coll, ec_impl, this ->sinfo, read_pipeline, unstable_hashinfo_registry, get_parent(), this),
134135 ec_impl(ec_impl),
135136 sinfo(ec_impl, stripe_width),
136137 unstable_hashinfo_registry(cct, ec_impl) {
@@ -195,8 +196,8 @@ struct RecoveryMessages {
195196 const map<pg_shard_t , vector<pair<int , int >>> &need,
196197 bool attrs)
197198 {
198- list<ECCommon:: ec_align_t > to_read;
199- to_read.emplace_back (ECCommon:: ec_align_t {off, len, 0 });
199+ list<ec_align_t > to_read;
200+ to_read.emplace_back (ec_align_t {off, len, 0 });
200201 ceph_assert (!recovery_reads.count (hoid));
201202 want_to_read.insert (make_pair (hoid, std::move (_want_to_read)));
202203 recovery_reads.insert (
@@ -233,7 +234,7 @@ void ECBackend::handle_recovery_push(
233234 !(get_parent ()->pgb_is_primary ()) &&
234235 get_parent ()->pg_is_remote_backfilling ()) {
235236 struct stat st;
236- int r = store->stat (ch, ghobject_t (op.soid , ghobject_t ::NO_GEN,
237+ int r = switcher-> store ->stat (switcher-> ch , ghobject_t (op.soid , ghobject_t ::NO_GEN,
237238 get_parent ()->whoami_shard ().shard ), &st);
238239 if (r == 0 ) {
239240 get_parent ()->pg_sub_local_num_bytes (st.st_size );
@@ -463,7 +464,7 @@ struct RecoveryReadCompleter : ECCommon::ReadCompleter {
463464 void finish_single_request (
464465 const hobject_t &hoid,
465466 ECCommon::read_result_t &res,
466- list<ECCommon:: ec_align_t >,
467+ list<ec_align_t >,
467468 set<int > wanted_to_read) override
468469 {
469470 if (!(res.r == 0 && res.errors .empty ())) {
@@ -720,13 +721,13 @@ void ECBackend::RecoveryBackend::continue_recovery_op(
720721}
721722
722723void ECBackend::run_recovery_op (
723- RecoveryHandle *_h,
724+ PGBackend:: RecoveryHandle *_h,
724725 int priority)
725726{
726727 ceph_assert (_h);
727728 ECRecoveryHandle &h = static_cast <ECRecoveryHandle&>(*_h);
728729 recovery_backend.run_recovery_op (h, priority);
729- send_recovery_deletes (priority, h.deletes );
730+ switcher-> send_recovery_deletes (priority, h.deletes );
730731 delete _h;
731732}
732733
@@ -751,7 +752,7 @@ int ECBackend::recover_object(
751752 eversion_t v,
752753 ObjectContextRef head,
753754 ObjectContextRef obc,
754- RecoveryHandle *_h)
755+ PGBackend:: RecoveryHandle *_h)
755756{
756757 return recovery_backend.recover_object (hoid, v, head, obc, _h);
757758}
@@ -761,7 +762,7 @@ int ECBackend::RecoveryBackend::recover_object(
761762 eversion_t v,
762763 ObjectContextRef head,
763764 ObjectContextRef obc,
764- RecoveryHandle *_h)
765+ PGBackend:: RecoveryHandle *_h)
765766{
766767 ECRecoveryHandle *h = static_cast <ECRecoveryHandle*>(_h);
767768 h->ops .push_back (RecoveryOp ());
@@ -832,7 +833,7 @@ bool ECBackend::_handle_message(
832833 auto op = _op->get_req <MOSDECSubOpRead>();
833834 MOSDECSubOpReadReply *reply = new MOSDECSubOpReadReply;
834835 reply->pgid = get_parent ()->primary_spg_t ();
835- reply->map_epoch = get_osdmap_epoch ();
836+ reply->map_epoch = switcher-> get_osdmap_epoch ();
836837 reply->min_epoch = get_parent ()->get_interval_start_epoch ();
837838 handle_sub_read (op->op .from , op->op , &(reply->op ), _op->pg_trace );
838839 reply->trace = _op->pg_trace ;
@@ -918,7 +919,7 @@ void ECBackend::sub_write_committed(
918919 get_parent ()->update_last_complete_ondisk (last_complete);
919920 MOSDECSubOpWriteReply *r = new MOSDECSubOpWriteReply;
920921 r->pgid = get_parent ()->primary_spg_t ();
921- r->map_epoch = get_osdmap_epoch ();
922+ r->map_epoch = switcher-> get_osdmap_epoch ();
922923 r->min_epoch = get_parent ()->get_interval_start_epoch ();
923924 r->op .tid = tid;
924925 r->op .last_complete = last_complete;
@@ -929,7 +930,7 @@ void ECBackend::sub_write_committed(
929930 r->trace = trace;
930931 r->trace .event (" sending sub op commit" );
931932 get_parent ()->send_message_osd_cluster (
932- get_parent ()->primary_shard ().osd , r, get_osdmap_epoch ());
933+ get_parent ()->primary_shard ().osd , r, switcher-> get_osdmap_epoch ());
933934 }
934935}
935936
@@ -946,14 +947,14 @@ void ECBackend::handle_sub_write(
946947 trace.event (" handle_sub_write" );
947948
948949 if (cct->_conf ->bluestore_debug_inject_read_err &&
949- ec_inject_test_write_error3 (op.soid )) {
950+ ECInject::test_write_error3 (op.soid )) {
950951 ceph_abort_msg (" Error inject - OSD down" );
951952 }
952953 if (!get_parent ()->pgb_is_primary ())
953954 get_parent ()->update_stats (op.stats );
954955 ObjectStore::Transaction localt;
955956 if (!op.temp_added .empty ()) {
956- add_temp_objs (op.temp_added );
957+ switcher-> add_temp_objs (op.temp_added );
957958 }
958959 if (op.backfill_or_async_recovery ) {
959960 for (set<hobject_t >::iterator i = op.temp_removed .begin ();
@@ -962,14 +963,14 @@ void ECBackend::handle_sub_write(
962963 dout (10 ) << __func__ << " : removing object " << *i
963964 << " since we won't get the transaction" << dendl;
964965 localt.remove (
965- coll,
966+ switcher-> coll ,
966967 ghobject_t (
967968 *i,
968969 ghobject_t ::NO_GEN,
969970 get_parent ()->whoami_shard ().shard ));
970971 }
971972 }
972- clear_temp_objs (op.temp_removed );
973+ switcher-> clear_temp_objs (op.temp_removed );
973974 dout (30 ) << __func__ << " missing before " << get_parent ()->get_log ().get_missing ().get_items () << dendl;
974975 // flag set to true during async recovery
975976 bool async = false ;
@@ -1033,8 +1034,8 @@ void ECBackend::handle_sub_read(
10331034 (op.subchunks .find (i->first )->second .front ().second ==
10341035 ec_impl->get_sub_chunk_count ())) {
10351036 dout (20 ) << __func__ << " case1: reading the complete chunk/shard." << dendl;
1036- r = store->read (
1037- ch,
1037+ r = switcher-> store ->read (
1038+ switcher-> ch ,
10381039 ghobject_t (i->first , ghobject_t ::NO_GEN, shard),
10391040 j->get <0 >(),
10401041 j->get <1 >(),
@@ -1050,8 +1051,8 @@ void ECBackend::handle_sub_read(
10501051 m += sinfo.get_chunk_size ()) {
10511052 for (auto &&k:op.subchunks .find (i->first )->second ) {
10521053 bufferlist bl0;
1053- r = store->read (
1054- ch,
1054+ r = switcher-> store ->read (
1055+ switcher-> ch ,
10551056 ghobject_t (i->first , ghobject_t ::NO_GEN, shard),
10561057 j->get <0 >() + m + (k.first )*subchunk_size,
10571058 (k.second )*subchunk_size,
@@ -1101,7 +1102,7 @@ void ECBackend::handle_sub_read(
11011102 int r = object_stat (i->first , &st);
11021103 if (r >= 0 ) {
11031104 dout (10 ) << __func__ << " : found on disk, size " << st.st_size << dendl;
1104- r = PGBackend::objects_get_attrs (i->first , &attrs);
1105+ r = switcher-> objects_get_attrs_with_hinfo (i->first , &attrs);
11051106 }
11061107 if (r >= 0 ) {
11071108 hinfo = unstable_hashinfo_registry.get_hash_info (i->first , false , attrs, st.st_size );
@@ -1148,8 +1149,8 @@ void ECBackend::handle_sub_read(
11481149 << *i << dendl;
11491150 if (reply->errors .count (*i))
11501151 continue ;
1151- int r = store->getattrs (
1152- ch,
1152+ int r = switcher-> store ->getattrs (
1153+ switcher-> ch ,
11531154 ghobject_t (
11541155 *i, ghobject_t ::NO_GEN, shard),
11551156 reply->attrs_read [*i]);
@@ -1196,7 +1197,7 @@ void ECBackend::handle_sub_write_reply(
11961197 }
11971198 if (cct->_conf ->bluestore_debug_inject_read_err &&
11981199 (i->second ->pending_commit .size () == 1 ) &&
1199- ec_inject_test_write_error2 (i->second ->hoid )) {
1200+ ECInject::test_write_error2 (i->second ->hoid )) {
12001201 std::string cmd =
12011202 " { \" prefix\" : \" osd down\" , \" ids\" : [\" " + std::to_string ( get_parent ()->whoami () ) + " \" ] }" ;
12021203 vector<std::string> vcmd{cmd};
@@ -1224,7 +1225,7 @@ void ECBackend::handle_sub_read_reply(
12241225 for (auto i = op.buffers_read .begin ();
12251226 i != op.buffers_read .end ();
12261227 ++i) {
1227- if (ec_inject_test_read_error0 (ghobject_t (i->first , ghobject_t ::NO_GEN, op.from .shard ))) {
1228+ if (ECInject::test_read_error0 (ghobject_t (i->first , ghobject_t ::NO_GEN, op.from .shard ))) {
12281229 dout (0 ) << __func__ << " Error inject - EIO error for shard " << op.from .shard << dendl;
12291230 op.buffers_read .erase (i->first );
12301231 op.attrs_read .erase (i->first );
@@ -1482,7 +1483,7 @@ std::tuple<
14821483 return { r, {}, 0 };
14831484 }
14841485 map<string, bufferlist, less<>> real_attrs;
1485- if (int r = PGBackend::objects_get_attrs (hoid, &real_attrs); r < 0 ) {
1486+ if (int r = switcher-> objects_get_attrs_with_hinfo (hoid, &real_attrs); r < 0 ) {
14861487 dout (10 ) << __func__ << " : get attr error " << r << " on" << hoid << dendl;
14871488 return { r, {}, 0 };
14881489 }
@@ -1567,7 +1568,8 @@ int ECBackend::objects_read_sync(
15671568
15681569void ECBackend::objects_read_async (
15691570 const hobject_t &hoid,
1570- const list<pair<ECCommon::ec_align_t ,
1571+ uint64_t object_size,
1572+ const list<pair<ec_align_t ,
15711573 pair<bufferlist*, Context*>>> &to_read,
15721574 Context *on_complete,
15731575 bool fast_read)
@@ -1599,14 +1601,14 @@ void ECBackend::objects_read_async(
15991601 struct cb {
16001602 ECBackend *ec;
16011603 hobject_t hoid;
1602- list<pair<ECCommon:: ec_align_t ,
1604+ list<pair<ec_align_t ,
16031605 pair<bufferlist*, Context*> > > to_read;
16041606 unique_ptr<Context> on_complete;
16051607 cb (const cb&) = delete ;
16061608 cb (cb &&) = default ;
16071609 cb (ECBackend *ec,
16081610 const hobject_t &hoid,
1609- const list<pair<ECCommon:: ec_align_t ,
1611+ const list<pair<ec_align_t ,
16101612 pair<bufferlist*, Context*> > > &to_read,
16111613 Context *on_complete)
16121614 : ec(ec),
@@ -1680,7 +1682,7 @@ void ECBackend::objects_read_async(
16801682
16811683void ECBackend::objects_read_and_reconstruct (
16821684 const map<hobject_t ,
1683- std::list<ECBackend:: ec_align_t >
1685+ std::list<ec_align_t >
16841686 > &reads,
16851687 bool fast_read,
16861688 GenContextURef<ECCommon::ec_extents_t &&> &&func)
@@ -1697,8 +1699,8 @@ int ECBackend::object_stat(
16971699 const hobject_t &hoid,
16981700 struct stat * st)
16991701{
1700- int r = store->stat (
1701- ch,
1702+ int r = switcher-> store ->stat (
1703+ switcher-> ch ,
17021704 ghobject_t {hoid, ghobject_t ::NO_GEN, get_parent ()->whoami_shard ().shard },
17031705 st);
17041706 return r;
@@ -1708,11 +1710,6 @@ int ECBackend::objects_get_attrs(
17081710 const hobject_t &hoid,
17091711 map<string, bufferlist, less<>> *out)
17101712{
1711- // call from parents -- get raw attrs, without any filtering for hinfo
1712- int r = PGBackend::objects_get_attrs (hoid, out);
1713- if (r < 0 )
1714- return r;
1715-
17161713 for (map<string, bufferlist>::iterator i = out->begin ();
17171714 i != out->end ();
17181715 ) {
@@ -1721,20 +1718,7 @@ int ECBackend::objects_get_attrs(
17211718 else
17221719 ++i;
17231720 }
1724- return r;
1725- }
1726-
1727- void ECBackend::rollback_append (
1728- const hobject_t &hoid,
1729- uint64_t old_size,
1730- ObjectStore::Transaction *t)
1731- {
1732- ceph_assert (old_size % sinfo.get_stripe_width () == 0 );
1733- t->truncate (
1734- coll,
1735- ghobject_t (hoid, ghobject_t ::NO_GEN, get_parent ()->whoami_shard ().shard ),
1736- sinfo.aligned_logical_offset_to_chunk_offset (
1737- old_size));
1721+ return 0 ;
17381722}
17391723
17401724int ECBackend::be_deep_scrub (
@@ -1766,8 +1750,8 @@ int ECBackend::be_deep_scrub(
17661750 stride += sinfo.get_chunk_size () - (stride % sinfo.get_chunk_size ());
17671751
17681752 bufferlist bl;
1769- r = store->read (
1770- ch,
1753+ r = switcher-> store ->read (
1754+ switcher-> ch ,
17711755 ghobject_t (
17721756 poid, ghobject_t ::NO_GEN, get_parent ()->whoami_shard ().shard ),
17731757 pos.data_pos ,
0 commit comments