Skip to content

Commit e7e33a6

Browse files
Merge pull request ceph#61263 from aainscow/legacy_ec
OSD: Refactor EC to have two versions: Legacy and New EC
2 parents aed35de + 562aad6 commit e7e33a6

35 files changed

+7971
-640
lines changed

src/crimson/osd/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,7 @@ add_executable(crimson-osd
4646
${PROJECT_SOURCE_DIR}/src/objclass/class_api.cc
4747
${PROJECT_SOURCE_DIR}/src/osd/ClassHandler.cc
4848
${PROJECT_SOURCE_DIR}/src/osd/ECUtil.cc
49+
${PROJECT_SOURCE_DIR}/src/osd/ECUtilL.cc
4950
${PROJECT_SOURCE_DIR}/src/osd/osd_op_util.cc
5051
${PROJECT_SOURCE_DIR}/src/osd/OSDCap.cc
5152
${PROJECT_SOURCE_DIR}/src/osd/PeeringState.cc

src/osd/CMakeLists.txt

Lines changed: 12 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -17,9 +17,6 @@ set(osd_srcs
1717
PGLog.cc
1818
PrimaryLogPG.cc
1919
ReplicatedBackend.cc
20-
ECCommon.cc
21-
ECBackend.cc
22-
ECTransaction.cc
2320
PGBackend.cc
2421
OSDCap.cc
2522
scrubber/pg_scrubber.cc
@@ -36,8 +33,6 @@ set(osd_srcs
3633
Session.cc
3734
SnapMapper.cc
3835
osd_types.cc
39-
ECUtil.cc
40-
ExtentCache.cc
4136
scheduler/OpScheduler.cc
4237
scheduler/OpSchedulerItem.cc
4338
scheduler/mClockScheduler.cc
@@ -46,6 +41,18 @@ set(osd_srcs
4641
recovery_types.cc
4742
MissingLoc.cc
4843
osd_perf_counters.cc
44+
ECCommonL.cc
45+
ECBackendL.cc
46+
ECExtentCacheL.cc
47+
ECTransactionL.cc
48+
ECUtilL.cc
49+
ECCommon.cc
50+
ECBackend.cc
51+
ExtentCache.cc
52+
ECTransaction.cc
53+
ECUtil.cc
54+
ECInject.cc
55+
ECInject.h
4956
${CMAKE_SOURCE_DIR}/src/common/TrackedOp.cc
5057
${CMAKE_SOURCE_DIR}/src/mgr/OSDPerfMetricTypes.cc
5158
${osd_cyg_functions_src}

src/osd/ECBackend.cc

Lines changed: 42 additions & 58 deletions
Original file line numberDiff line numberDiff line change
@@ -16,13 +16,16 @@
1616
#include <sstream>
1717

1818
#include "ECBackend.h"
19+
#include "ECInject.h"
1920
#include "messages/MOSDPGPush.h"
2021
#include "messages/MOSDPGPushReply.h"
2122
#include "messages/MOSDECSubOpWrite.h"
2223
#include "messages/MOSDECSubOpWriteReply.h"
2324
#include "messages/MOSDECSubOpRead.h"
2425
#include "messages/MOSDECSubOpReadReply.h"
2526
#include "ECMsgTypes.h"
27+
#include "ECTypes.h"
28+
#include "ECSwitch.h"
2629

2730
#include "PrimaryLogPG.h"
2831
#include "osd_tracer.h"
@@ -121,16 +124,14 @@ void ECBackend::RecoveryBackend::RecoveryOp::dump(Formatter *f) const
121124

122125
ECBackend::ECBackend(
123126
PGBackend::Listener *pg,
124-
const coll_t &coll,
125-
ObjectStore::CollectionHandle &ch,
126-
ObjectStore *store,
127127
CephContext *cct,
128128
ErasureCodeInterfaceRef ec_impl,
129-
uint64_t stripe_width)
130-
: PGBackend(cct, pg, store, coll, ch),
129+
uint64_t stripe_width,
130+
ECSwitch *s)
131+
: parent(pg), cct(cct), switcher(s),
131132
read_pipeline(cct, ec_impl, this->sinfo, get_parent()->get_eclistener()),
132133
rmw_pipeline(cct, ec_impl, this->sinfo, get_parent()->get_eclistener(), *this),
133-
recovery_backend(cct, this->coll, ec_impl, this->sinfo, read_pipeline, unstable_hashinfo_registry, get_parent(), this),
134+
recovery_backend(cct, switcher->coll, ec_impl, this->sinfo, read_pipeline, unstable_hashinfo_registry, get_parent(), this),
134135
ec_impl(ec_impl),
135136
sinfo(ec_impl, stripe_width),
136137
unstable_hashinfo_registry(cct, ec_impl) {
@@ -195,8 +196,8 @@ struct RecoveryMessages {
195196
const map<pg_shard_t, vector<pair<int, int>>> &need,
196197
bool attrs)
197198
{
198-
list<ECCommon::ec_align_t> to_read;
199-
to_read.emplace_back(ECCommon::ec_align_t{off, len, 0});
199+
list<ec_align_t> to_read;
200+
to_read.emplace_back(ec_align_t{off, len, 0});
200201
ceph_assert(!recovery_reads.count(hoid));
201202
want_to_read.insert(make_pair(hoid, std::move(_want_to_read)));
202203
recovery_reads.insert(
@@ -233,7 +234,7 @@ void ECBackend::handle_recovery_push(
233234
!(get_parent()->pgb_is_primary()) &&
234235
get_parent()->pg_is_remote_backfilling()) {
235236
struct stat st;
236-
int r = store->stat(ch, ghobject_t(op.soid, ghobject_t::NO_GEN,
237+
int r = switcher->store->stat(switcher->ch, ghobject_t(op.soid, ghobject_t::NO_GEN,
237238
get_parent()->whoami_shard().shard), &st);
238239
if (r == 0) {
239240
get_parent()->pg_sub_local_num_bytes(st.st_size);
@@ -463,7 +464,7 @@ struct RecoveryReadCompleter : ECCommon::ReadCompleter {
463464
void finish_single_request(
464465
const hobject_t &hoid,
465466
ECCommon::read_result_t &res,
466-
list<ECCommon::ec_align_t>,
467+
list<ec_align_t>,
467468
set<int> wanted_to_read) override
468469
{
469470
if (!(res.r == 0 && res.errors.empty())) {
@@ -720,13 +721,13 @@ void ECBackend::RecoveryBackend::continue_recovery_op(
720721
}
721722

722723
void ECBackend::run_recovery_op(
723-
RecoveryHandle *_h,
724+
PGBackend::RecoveryHandle *_h,
724725
int priority)
725726
{
726727
ceph_assert(_h);
727728
ECRecoveryHandle &h = static_cast<ECRecoveryHandle&>(*_h);
728729
recovery_backend.run_recovery_op(h, priority);
729-
send_recovery_deletes(priority, h.deletes);
730+
switcher->send_recovery_deletes(priority, h.deletes);
730731
delete _h;
731732
}
732733

@@ -751,7 +752,7 @@ int ECBackend::recover_object(
751752
eversion_t v,
752753
ObjectContextRef head,
753754
ObjectContextRef obc,
754-
RecoveryHandle *_h)
755+
PGBackend::RecoveryHandle *_h)
755756
{
756757
return recovery_backend.recover_object(hoid, v, head, obc, _h);
757758
}
@@ -761,7 +762,7 @@ int ECBackend::RecoveryBackend::recover_object(
761762
eversion_t v,
762763
ObjectContextRef head,
763764
ObjectContextRef obc,
764-
RecoveryHandle *_h)
765+
PGBackend::RecoveryHandle *_h)
765766
{
766767
ECRecoveryHandle *h = static_cast<ECRecoveryHandle*>(_h);
767768
h->ops.push_back(RecoveryOp());
@@ -832,7 +833,7 @@ bool ECBackend::_handle_message(
832833
auto op = _op->get_req<MOSDECSubOpRead>();
833834
MOSDECSubOpReadReply *reply = new MOSDECSubOpReadReply;
834835
reply->pgid = get_parent()->primary_spg_t();
835-
reply->map_epoch = get_osdmap_epoch();
836+
reply->map_epoch = switcher->get_osdmap_epoch();
836837
reply->min_epoch = get_parent()->get_interval_start_epoch();
837838
handle_sub_read(op->op.from, op->op, &(reply->op), _op->pg_trace);
838839
reply->trace = _op->pg_trace;
@@ -918,7 +919,7 @@ void ECBackend::sub_write_committed(
918919
get_parent()->update_last_complete_ondisk(last_complete);
919920
MOSDECSubOpWriteReply *r = new MOSDECSubOpWriteReply;
920921
r->pgid = get_parent()->primary_spg_t();
921-
r->map_epoch = get_osdmap_epoch();
922+
r->map_epoch = switcher->get_osdmap_epoch();
922923
r->min_epoch = get_parent()->get_interval_start_epoch();
923924
r->op.tid = tid;
924925
r->op.last_complete = last_complete;
@@ -929,7 +930,7 @@ void ECBackend::sub_write_committed(
929930
r->trace = trace;
930931
r->trace.event("sending sub op commit");
931932
get_parent()->send_message_osd_cluster(
932-
get_parent()->primary_shard().osd, r, get_osdmap_epoch());
933+
get_parent()->primary_shard().osd, r, switcher->get_osdmap_epoch());
933934
}
934935
}
935936

@@ -946,14 +947,14 @@ void ECBackend::handle_sub_write(
946947
trace.event("handle_sub_write");
947948

948949
if (cct->_conf->bluestore_debug_inject_read_err &&
949-
ec_inject_test_write_error3(op.soid)) {
950+
ECInject::test_write_error3(op.soid)) {
950951
ceph_abort_msg("Error inject - OSD down");
951952
}
952953
if (!get_parent()->pgb_is_primary())
953954
get_parent()->update_stats(op.stats);
954955
ObjectStore::Transaction localt;
955956
if (!op.temp_added.empty()) {
956-
add_temp_objs(op.temp_added);
957+
switcher->add_temp_objs(op.temp_added);
957958
}
958959
if (op.backfill_or_async_recovery) {
959960
for (set<hobject_t>::iterator i = op.temp_removed.begin();
@@ -962,14 +963,14 @@ void ECBackend::handle_sub_write(
962963
dout(10) << __func__ << ": removing object " << *i
963964
<< " since we won't get the transaction" << dendl;
964965
localt.remove(
965-
coll,
966+
switcher->coll,
966967
ghobject_t(
967968
*i,
968969
ghobject_t::NO_GEN,
969970
get_parent()->whoami_shard().shard));
970971
}
971972
}
972-
clear_temp_objs(op.temp_removed);
973+
switcher->clear_temp_objs(op.temp_removed);
973974
dout(30) << __func__ << " missing before " << get_parent()->get_log().get_missing().get_items() << dendl;
974975
// flag set to true during async recovery
975976
bool async = false;
@@ -1033,8 +1034,8 @@ void ECBackend::handle_sub_read(
10331034
(op.subchunks.find(i->first)->second.front().second ==
10341035
ec_impl->get_sub_chunk_count())) {
10351036
dout(20) << __func__ << " case1: reading the complete chunk/shard." << dendl;
1036-
r = store->read(
1037-
ch,
1037+
r = switcher->store->read(
1038+
switcher->ch,
10381039
ghobject_t(i->first, ghobject_t::NO_GEN, shard),
10391040
j->get<0>(),
10401041
j->get<1>(),
@@ -1050,8 +1051,8 @@ void ECBackend::handle_sub_read(
10501051
m += sinfo.get_chunk_size()) {
10511052
for (auto &&k:op.subchunks.find(i->first)->second) {
10521053
bufferlist bl0;
1053-
r = store->read(
1054-
ch,
1054+
r = switcher->store->read(
1055+
switcher->ch,
10551056
ghobject_t(i->first, ghobject_t::NO_GEN, shard),
10561057
j->get<0>() + m + (k.first)*subchunk_size,
10571058
(k.second)*subchunk_size,
@@ -1101,7 +1102,7 @@ void ECBackend::handle_sub_read(
11011102
int r = object_stat(i->first, &st);
11021103
if (r >= 0) {
11031104
dout(10) << __func__ << ": found on disk, size " << st.st_size << dendl;
1104-
r = PGBackend::objects_get_attrs(i->first, &attrs);
1105+
r = switcher->objects_get_attrs_with_hinfo(i->first, &attrs);
11051106
}
11061107
if (r >= 0) {
11071108
hinfo = unstable_hashinfo_registry.get_hash_info(i->first, false, attrs, st.st_size);
@@ -1148,8 +1149,8 @@ void ECBackend::handle_sub_read(
11481149
<< *i << dendl;
11491150
if (reply->errors.count(*i))
11501151
continue;
1151-
int r = store->getattrs(
1152-
ch,
1152+
int r = switcher->store->getattrs(
1153+
switcher->ch,
11531154
ghobject_t(
11541155
*i, ghobject_t::NO_GEN, shard),
11551156
reply->attrs_read[*i]);
@@ -1196,7 +1197,7 @@ void ECBackend::handle_sub_write_reply(
11961197
}
11971198
if (cct->_conf->bluestore_debug_inject_read_err &&
11981199
(i->second->pending_commit.size() == 1) &&
1199-
ec_inject_test_write_error2(i->second->hoid)) {
1200+
ECInject::test_write_error2(i->second->hoid)) {
12001201
std::string cmd =
12011202
"{ \"prefix\": \"osd down\", \"ids\": [\"" + std::to_string( get_parent()->whoami() ) + "\"] }";
12021203
vector<std::string> vcmd{cmd};
@@ -1224,7 +1225,7 @@ void ECBackend::handle_sub_read_reply(
12241225
for (auto i = op.buffers_read.begin();
12251226
i != op.buffers_read.end();
12261227
++i) {
1227-
if (ec_inject_test_read_error0(ghobject_t(i->first, ghobject_t::NO_GEN, op.from.shard))) {
1228+
if (ECInject::test_read_error0(ghobject_t(i->first, ghobject_t::NO_GEN, op.from.shard))) {
12281229
dout(0) << __func__ << " Error inject - EIO error for shard " << op.from.shard << dendl;
12291230
op.buffers_read.erase(i->first);
12301231
op.attrs_read.erase(i->first);
@@ -1482,7 +1483,7 @@ std::tuple<
14821483
return { r, {}, 0 };
14831484
}
14841485
map<string, bufferlist, less<>> real_attrs;
1485-
if (int r = PGBackend::objects_get_attrs(hoid, &real_attrs); r < 0) {
1486+
if (int r = switcher->objects_get_attrs_with_hinfo(hoid, &real_attrs); r < 0) {
14861487
dout(10) << __func__ << ": get attr error " << r << " on" << hoid << dendl;
14871488
return { r, {}, 0 };
14881489
}
@@ -1567,7 +1568,8 @@ int ECBackend::objects_read_sync(
15671568

15681569
void ECBackend::objects_read_async(
15691570
const hobject_t &hoid,
1570-
const list<pair<ECCommon::ec_align_t,
1571+
uint64_t object_size,
1572+
const list<pair<ec_align_t,
15711573
pair<bufferlist*, Context*>>> &to_read,
15721574
Context *on_complete,
15731575
bool fast_read)
@@ -1599,14 +1601,14 @@ void ECBackend::objects_read_async(
15991601
struct cb {
16001602
ECBackend *ec;
16011603
hobject_t hoid;
1602-
list<pair<ECCommon::ec_align_t,
1604+
list<pair<ec_align_t,
16031605
pair<bufferlist*, Context*> > > to_read;
16041606
unique_ptr<Context> on_complete;
16051607
cb(const cb&) = delete;
16061608
cb(cb &&) = default;
16071609
cb(ECBackend *ec,
16081610
const hobject_t &hoid,
1609-
const list<pair<ECCommon::ec_align_t,
1611+
const list<pair<ec_align_t,
16101612
pair<bufferlist*, Context*> > > &to_read,
16111613
Context *on_complete)
16121614
: ec(ec),
@@ -1680,7 +1682,7 @@ void ECBackend::objects_read_async(
16801682

16811683
void ECBackend::objects_read_and_reconstruct(
16821684
const map<hobject_t,
1683-
std::list<ECBackend::ec_align_t>
1685+
std::list<ec_align_t>
16841686
> &reads,
16851687
bool fast_read,
16861688
GenContextURef<ECCommon::ec_extents_t &&> &&func)
@@ -1697,8 +1699,8 @@ int ECBackend::object_stat(
16971699
const hobject_t &hoid,
16981700
struct stat* st)
16991701
{
1700-
int r = store->stat(
1701-
ch,
1702+
int r = switcher->store->stat(
1703+
switcher->ch,
17021704
ghobject_t{hoid, ghobject_t::NO_GEN, get_parent()->whoami_shard().shard},
17031705
st);
17041706
return r;
@@ -1708,11 +1710,6 @@ int ECBackend::objects_get_attrs(
17081710
const hobject_t &hoid,
17091711
map<string, bufferlist, less<>> *out)
17101712
{
1711-
// call from parents -- get raw attrs, without any filtering for hinfo
1712-
int r = PGBackend::objects_get_attrs(hoid, out);
1713-
if (r < 0)
1714-
return r;
1715-
17161713
for (map<string, bufferlist>::iterator i = out->begin();
17171714
i != out->end();
17181715
) {
@@ -1721,20 +1718,7 @@ int ECBackend::objects_get_attrs(
17211718
else
17221719
++i;
17231720
}
1724-
return r;
1725-
}
1726-
1727-
void ECBackend::rollback_append(
1728-
const hobject_t &hoid,
1729-
uint64_t old_size,
1730-
ObjectStore::Transaction *t)
1731-
{
1732-
ceph_assert(old_size % sinfo.get_stripe_width() == 0);
1733-
t->truncate(
1734-
coll,
1735-
ghobject_t(hoid, ghobject_t::NO_GEN, get_parent()->whoami_shard().shard),
1736-
sinfo.aligned_logical_offset_to_chunk_offset(
1737-
old_size));
1721+
return 0;
17381722
}
17391723

17401724
int ECBackend::be_deep_scrub(
@@ -1766,8 +1750,8 @@ int ECBackend::be_deep_scrub(
17661750
stride += sinfo.get_chunk_size() - (stride % sinfo.get_chunk_size());
17671751

17681752
bufferlist bl;
1769-
r = store->read(
1770-
ch,
1753+
r = switcher->store->read(
1754+
switcher->ch,
17711755
ghobject_t(
17721756
poid, ghobject_t::NO_GEN, get_parent()->whoami_shard().shard),
17731757
pos.data_pos,

0 commit comments

Comments
 (0)