Skip to content

Commit ae4da72

Browse files
committed
osd: Remove all references to hinfo from optimized EC
Legacy EC used hinfo to store two things: 1. Shard size 2. CRCs of the shards However: * Optimized EC stores different object sizes on each shard * Optimized EC scrub calculates the correct sizes of shards and checks them, so shard size checks are not needed in hinfo. * Bluestore checks the CRC. * Seastore checks the CRC. As such, the hinfo object is redundant. As such we remove it in optimized EC: 1. Remove all references/upgrades to hinfo. 2. Delete hinfo attribute if found on recovery/backfill. 3. Redirect all scrub references for hinfo to legacy EC. Signed-off-by: Alex Ainscow <[email protected]>
1 parent 25803ec commit ae4da72

File tree

18 files changed

+52
-505
lines changed

18 files changed

+52
-505
lines changed

src/crimson/osd/scrub/scrub_validator.cc

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
#include "crimson/common/log.h"
99
#include "crimson/osd/scrub/scrub_validator.h"
1010
#include "osd/ECUtil.h"
11+
#include "osd/ECUtilL.h"
1112

1213
SET_SUBSYS(osd);
1314

@@ -31,7 +32,7 @@ struct shard_evaluation_t {
3132

3233
std::optional<object_info_t> object_info;
3334
std::optional<SnapSet> snapset;
34-
std::optional<ECUtil::HashInfo> hinfo;
35+
std::optional<ECLegacy::ECUtilL::HashInfo> hinfo;
3536

3637
size_t omap_keys{0};
3738
size_t omap_bytes{0};
@@ -132,11 +133,11 @@ shard_evaluation_t evaluate_object_shard(
132133
}
133134

134135
if (policy.is_ec()) {
135-
auto xiter = obj.attrs.find(ECUtil::get_hinfo_key());
136+
auto xiter = obj.attrs.find(ECLegacy::ECUtilL::get_hinfo_key());
136137
if (xiter == obj.attrs.end()) {
137138
ret.shard_info.set_hinfo_missing();
138139
} else {
139-
ret.hinfo = ECUtil::HashInfo{};
140+
ret.hinfo = ECLegacy::ECUtilL::HashInfo{};
140141
try {
141142
auto bliter = xiter->second.cbegin();
142143
decode(*(ret.hinfo), bliter);
@@ -210,10 +211,10 @@ librados::obj_err_t compare_candidate_to_authoritative(
210211
}
211212

212213
if (policy.is_ec()) {
213-
auto aiter = auth_si.attrs.find(ECUtil::get_hinfo_key());
214+
auto aiter = auth_si.attrs.find(ECLegacy::ECUtilL::get_hinfo_key());
214215
ceph_assert(aiter != auth_si.attrs.end());
215216

216-
auto citer = cand_si.attrs.find(ECUtil::get_hinfo_key());
217+
auto citer = cand_si.attrs.find(ECLegacy::ECUtilL::get_hinfo_key());
217218
if (citer == cand_si.attrs.end() ||
218219
!aiter->second.contents_equal(citer->second)) {
219220
ret.errors |= obj_err_t::HINFO_INCONSISTENCY;
@@ -226,7 +227,7 @@ librados::obj_err_t compare_candidate_to_authoritative(
226227

227228
auto is_sys_attr = [&policy](const auto &str) {
228229
return str == OI_ATTR || str == SS_ATTR ||
229-
(policy.is_ec() && str == ECUtil::get_hinfo_key());
230+
(policy.is_ec() && str == ECLegacy::ECUtilL::get_hinfo_key());
230231
};
231232
for (auto aiter = auth_si.attrs.begin(); aiter != auth_si.attrs.end(); ++aiter) {
232233
if (is_sys_attr(aiter->first)) continue;

src/erasure-code/consistency/ECEncoder.cc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -106,7 +106,7 @@ std::optional<ceph::bufferlist> ECEncoder<stripe_info_o_t>::do_encode(ceph::buff
106106

107107
sinfo.ro_range_to_shard_extent_map(0, inbl.length(), inbl, encoded_data);
108108
encoded_data.insert_parity_buffers();
109-
int r = encoded_data.encode(ec_impl, nullptr, encoded_data.get_ro_end());
109+
int r = encoded_data.encode(ec_impl);
110110
if (r < 0) {
111111
std::cerr << "Failed to encode: " << cpp_strerror(r) << std::endl;
112112
return {};

src/osd/ECBackend.cc

Lines changed: 12 additions & 155 deletions
Original file line numberDiff line numberDiff line change
@@ -90,10 +90,9 @@ ECBackend::ECBackend(
9090
rmw_pipeline(cct, ec_impl, this->sinfo, get_parent()->get_eclistener(),
9191
*this, ec_extent_cache_lru),
9292
recovery_backend(cct, switcher->coll, ec_impl, this->sinfo, read_pipeline,
93-
unstable_hashinfo_registry, get_parent(), this),
93+
get_parent(), this),
9494
ec_impl(ec_impl),
95-
sinfo(ec_impl, &(get_parent()->get_pool()), stripe_width),
96-
unstable_hashinfo_registry(cct, ec_impl) {
95+
sinfo(ec_impl, &(get_parent()->get_pool()), stripe_width) {
9796

9897
/* EC makes some assumptions about how the plugin organises the *data* shards:
9998
* - The chunk size is constant for a particular profile.
@@ -113,15 +112,13 @@ ECBackend::RecoveryBackend::RecoveryBackend(
113112
ceph::ErasureCodeInterfaceRef ec_impl,
114113
const ECUtil::stripe_info_t &sinfo,
115114
ReadPipeline &read_pipeline,
116-
UnstableHashInfoRegistry &unstable_hashinfo_registry,
117115
ECListener *parent,
118116
ECBackend *ecbackend)
119117
: cct(cct),
120118
coll(coll),
121119
ec_impl(std::move(ec_impl)),
122120
sinfo(sinfo),
123121
read_pipeline(read_pipeline),
124-
unstable_hashinfo_registry(unstable_hashinfo_registry),
125122
parent(parent),
126123
ecbackend(ecbackend) {}
127124

@@ -335,17 +332,6 @@ void ECBackend::RecoveryBackend::handle_recovery_read_complete(
335332
op.recovery_info.size = op.obc->obs.oi.size;
336333
op.recovery_info.oi = op.obc->obs.oi;
337334
}
338-
339-
if (sinfo.get_is_hinfo_required()) {
340-
ECUtil::HashInfo hinfo(sinfo.get_k_plus_m());
341-
if (op.obc->obs.oi.size > 0) {
342-
ceph_assert(op.xattrs.count(ECUtil::get_hinfo_key()));
343-
auto bp = op.xattrs[ECUtil::get_hinfo_key()].cbegin();
344-
decode(hinfo, bp);
345-
}
346-
op.hinfo = unstable_hashinfo_registry.maybe_put_hash_info(
347-
hoid, std::move(hinfo));
348-
}
349335
}
350336
ceph_assert(op.xattrs.size());
351337
ceph_assert(op.obc);
@@ -561,29 +547,6 @@ void ECBackend::RecoveryBackend::continue_recovery_op(
561547

562548
if (op.recovery_progress.first && op.obc) {
563549
op.xattrs = op.obc->attr_cache;
564-
if (sinfo.get_is_hinfo_required()) {
565-
if (auto [r, attrs, size] = ecbackend->get_attrs_n_size_from_disk(
566-
op.hoid);
567-
r >= 0 || r == -ENOENT) {
568-
op.hinfo = unstable_hashinfo_registry.get_hash_info(
569-
op.hoid, false, attrs, size);
570-
} else {
571-
derr << __func__ << ": can't stat-or-getattr on " << op.hoid <<
572-
dendl;
573-
}
574-
if (!op.hinfo) {
575-
derr << __func__ << ": " << op.hoid << " has inconsistent hinfo"
576-
<< dendl;
577-
ceph_assert(recovery_ops.count(op.hoid));
578-
eversion_t v = recovery_ops[op.hoid].v;
579-
recovery_ops.erase(op.hoid);
580-
// TODO: not in crimson yet
581-
get_parent()->on_failed_pull({get_parent()->whoami_shard()},
582-
op.hoid, v);
583-
return;
584-
}
585-
encode(*(op.hinfo), op.xattrs[ECUtil::get_hinfo_key()]);
586-
}
587550
}
588551

589552
read_request_t read_request(std::move(want),
@@ -675,6 +638,12 @@ void ECBackend::RecoveryBackend::continue_recovery_op(
675638
dout(10) << __func__ << ": push all attrs (not nonprimary)" << dendl;
676639
pop.attrset = op.xattrs;
677640
}
641+
642+
// Following an upgrade, or turning of overwrites, we can take this
643+
// opportunity to clean up hinfo.
644+
if (pop.attrset.contains(ECUtil::get_hinfo_key())) {
645+
pop.attrset.erase(ECUtil::get_hinfo_key());
646+
}
678647
}
679648
pop.recovery_info = op.recovery_info;
680649
pop.before_progress = op.recovery_progress;
@@ -1100,53 +1069,6 @@ void ECBackend::handle_sub_read(
11001069
<< bl.length() << dendl;
11011070
reply->buffers_read[hoid].push_back(make_pair(offset, bl));
11021071
}
1103-
1104-
if (!sinfo.supports_ec_overwrites()) {
1105-
// This shows that we still need deep scrub because large enough files
1106-
// are read in sections, so the digest check here won't be done here.
1107-
// Do NOT check osd_read_eio_on_bad_digest here. We need to report
1108-
// the state of our chunk in case other chunks could substitute.
1109-
ECUtil::HashInfoRef hinfo;
1110-
map<string, bufferlist, less<>> attrs;
1111-
struct stat st;
1112-
int r = object_stat(hoid, &st);
1113-
if (r >= 0) {
1114-
dout(10) << __func__ << ": found on disk, size " << st.st_size << dendl;
1115-
r = switcher->objects_get_attrs_with_hinfo(hoid, &attrs);
1116-
}
1117-
if (r >= 0) {
1118-
hinfo = unstable_hashinfo_registry.get_hash_info(
1119-
hoid, false, attrs, st.st_size);
1120-
} else {
1121-
derr << __func__ << ": access (attrs) on " << hoid << " failed: "
1122-
<< cpp_strerror(r) << dendl;
1123-
}
1124-
if (!hinfo) {
1125-
r = -EIO;
1126-
get_parent()->clog_error() << "Corruption detected: object "
1127-
<< hoid << " is missing hash_info";
1128-
dout(5) << __func__ << ": No hinfo for " << hoid << dendl;
1129-
goto error;
1130-
}
1131-
ceph_assert(hinfo->has_chunk_hash());
1132-
if ((bl.length() == hinfo->get_total_chunk_size()) &&
1133-
(offset == 0)) {
1134-
dout(20) << __func__ << ": Checking hash of " << hoid << dendl;
1135-
bufferhash h(-1);
1136-
h << bl;
1137-
if (h.digest() != hinfo->get_chunk_hash(shard)) {
1138-
get_parent()->clog_error() << "Bad hash for " << hoid <<
1139-
" digest 0x"
1140-
<< hex << h.digest() << " expected 0x" << hinfo->
1141-
get_chunk_hash(shard) << dec;
1142-
dout(5) << __func__ << ": Bad hash for " << hoid << " digest 0x"
1143-
<< hex << h.digest() << " expected 0x"
1144-
<< hinfo->get_chunk_hash(shard) << dec << dendl;
1145-
r = -EIO;
1146-
goto error;
1147-
}
1148-
}
1149-
}
11501072
}
11511073
continue;
11521074
error:
@@ -1229,9 +1151,8 @@ void ECBackend::handle_sub_read_reply(
12291151
++i) {
12301152
if (ECInject::test_read_error0(
12311153
ghobject_t(i->first, ghobject_t::NO_GEN, op.from.shard))) {
1232-
dout(0) << __func__ << " Error inject - EIO error for shard " << op.from
1233-
.shard
1234-
<< dendl;
1154+
dout(0) << __func__ << " Error inject - EIO error for shard "
1155+
<< op.from.shard << dendl;
12351156
op.buffers_read.erase(i->first);
12361157
op.attrs_read.erase(i->first);
12371158
op.errors[i->first] = -EIO;
@@ -1513,14 +1434,6 @@ std::tuple<
15131434
return {0, real_attrs, st.st_size};
15141435
}
15151436

1516-
ECUtil::HashInfoRef ECBackend::get_hinfo_from_disk(hobject_t oid) {
1517-
auto [r, attrs, size] = get_attrs_n_size_from_disk(oid);
1518-
ceph_assert(r >= 0 || r == -ENOENT);
1519-
ECUtil::HashInfoRef hinfo = unstable_hashinfo_registry.get_hash_info(
1520-
oid, true, attrs, size);
1521-
return hinfo;
1522-
}
1523-
15241437
std::optional<object_info_t> ECBackend::get_object_info_from_obc(
15251438
ObjectContextRef &obc) {
15261439
std::optional<object_info_t> ret;
@@ -1578,21 +1491,12 @@ void ECBackend::submit_transaction(
15781491
op->t->safe_create_traverse(
15791492
[&](std::pair<const hobject_t, PGTransaction::ObjectOperation> &i) {
15801493
const auto &[oid, inner_op] = i;
1581-
ECUtil::HashInfoRef shinfo;
15821494
auto &obc = obc_map.at(oid);
15831495
object_info_t oi = obc->obs.oi;
15841496
std::optional<object_info_t> soi;
1585-
ECUtil::HashInfoRef hinfo;
1586-
1587-
if (!sinfo.supports_ec_overwrites()) {
1588-
hinfo = get_hinfo_from_disk(oid);
1589-
}
15901497

15911498
hobject_t source;
15921499
if (inner_op.has_source(&source)) {
1593-
if (!sinfo.supports_ec_overwrites()) {
1594-
shinfo = get_hinfo_from_disk(source);
1595-
}
15961500
if (!inner_op.is_rename()) {
15971501
soi = get_object_info_from_obc(obc_map.at(source));
15981502
}
@@ -1619,8 +1523,7 @@ void ECBackend::submit_transaction(
16191523
ECTransaction::WritePlanObj plan(oid, inner_op, sinfo, readable_shards,
16201524
writable_shards,
16211525
object_in_cache, old_object_size,
1622-
oi, soi, std::move(hinfo),
1623-
std::move(shinfo),
1526+
oi, soi,
16241527
rmw_pipeline.ec_pdw_write_mode);
16251528

16261529
if (plan.to_read) plans.want_read = true;
@@ -1857,53 +1760,7 @@ int ECBackend::be_deep_scrub(
18571760
return -EINPROGRESS;
18581761
}
18591762

1860-
if (!sinfo.get_is_hinfo_required()) {
1861-
o.digest = 0;
1862-
o.digest_present = true;
1863-
o.omap_digest = -1;
1864-
o.omap_digest_present = true;
1865-
return 0;
1866-
}
1867-
1868-
ECUtil::HashInfoRef hinfo = unstable_hashinfo_registry.get_hash_info(
1869-
poid, false, o.attrs, o.size);
1870-
if (!hinfo) {
1871-
dout(0) << "_scan_list " << poid << " could not retrieve hash info" << dendl;
1872-
o.read_error = true;
1873-
o.digest_present = false;
1874-
return 0;
1875-
}
1876-
if (!hinfo->has_chunk_hash()) {
1877-
dout(0) << "_scan_list " << poid << " got invalid hash info" << dendl;
1878-
o.ec_size_mismatch = true;
1879-
return 0;
1880-
}
1881-
if (hinfo->get_total_chunk_size() != (unsigned)pos.data_pos) {
1882-
dout(0) << "_scan_list " << poid << " got incorrect size on read 0x"
1883-
<< std::hex << pos
1884-
<< " expected 0x" << hinfo->get_total_chunk_size() << std::dec
1885-
<< dendl;
1886-
o.ec_size_mismatch = true;
1887-
return 0;
1888-
}
1889-
1890-
if (hinfo->get_chunk_hash(get_parent()->whoami_shard().shard) !=
1891-
pos.data_hash.digest()) {
1892-
dout(0) << "_scan_list " << poid << " got incorrect hash on read 0x"
1893-
<< std::hex << pos.data_hash.digest() << " != expected 0x"
1894-
<< hinfo->get_chunk_hash(get_parent()->whoami_shard().shard)
1895-
<< std::dec << dendl;
1896-
o.ec_hash_mismatch = true;
1897-
return 0;
1898-
}
1899-
1900-
/* We checked above that we match our own stored hash. We cannot
1901-
* send a hash of the actual object, so instead we simply send
1902-
* our locally stored hash of shard 0 on the assumption that if
1903-
* we match our chunk hash and our recollection of the hash for
1904-
* chunk 0 matches that of our peers, there is likely no corruption.
1905-
*/
1906-
o.digest = hinfo->get_chunk_hash(shard_id_t(0));
1763+
o.digest = 0;
19071764
o.digest_present = true;
19081765
o.omap_digest = -1;
19091766
o.omap_digest_present = true;

src/osd/ECBackend.h

Lines changed: 1 addition & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -217,7 +217,6 @@ class ECBackend : public ECCommon {
217217
ceph::ErasureCodeInterfaceRef ec_impl;
218218
const ECUtil::stripe_info_t &sinfo;
219219
ReadPipeline &read_pipeline;
220-
UnstableHashInfoRegistry &unstable_hashinfo_registry;
221220
// TODO: lay an interface down here
222221
ECListener *parent;
223222
ECBackend *ecbackend;
@@ -244,7 +243,6 @@ class ECBackend : public ECCommon {
244243
ceph::ErasureCodeInterfaceRef ec_impl,
245244
const ECUtil::stripe_info_t &sinfo,
246245
ReadPipeline &read_pipeline,
247-
UnstableHashInfoRegistry &unstable_hashinfo_registry,
248246
ECListener *parent,
249247
ECBackend *ecbackend);
250248

@@ -278,7 +276,6 @@ class ECBackend : public ECCommon {
278276
// must be filled if state == WRITING
279277
std::optional<ECUtil::shard_extent_map_t> returned_data;
280278
std::map<std::string, ceph::buffer::list, std::less<>> xattrs;
281-
ECUtil::HashInfoRef hinfo;
282279
ObjectContextRef obc;
283280
std::set<pg_shard_t> waiting_on_pushes;
284281

@@ -352,12 +349,10 @@ class ECBackend : public ECCommon {
352349
ceph::ErasureCodeInterfaceRef ec_impl,
353350
const ECUtil::stripe_info_t &sinfo,
354351
ReadPipeline &read_pipeline,
355-
UnstableHashInfoRegistry &unstable_hashinfo_registry,
356352
PGBackend::Listener *parent,
357353
ECBackend *ecbackend)
358354
: RecoveryBackend(cct, coll, std::move(ec_impl), sinfo, read_pipeline,
359-
unstable_hashinfo_registry, parent->get_eclistener(),
360-
ecbackend),
355+
parent->get_eclistener(), ecbackend),
361356
parent(parent) {}
362357

363358
void commit_txn_send_replies(
@@ -481,17 +476,12 @@ class ECBackend : public ECCommon {
481476

482477
const ECUtil::stripe_info_t sinfo;
483478

484-
ECCommon::UnstableHashInfoRegistry unstable_hashinfo_registry;
485-
486-
487479
std::tuple<
488480
int,
489481
std::map<std::string, ceph::bufferlist, std::less<>>,
490482
size_t
491483
> get_attrs_n_size_from_disk(const hobject_t &hoid);
492484

493-
ECUtil::HashInfoRef get_hinfo_from_disk(hobject_t oid);
494-
495485
std::optional<object_info_t> get_object_info_from_obc(
496486
ObjectContextRef &obc_map
497487
);

0 commit comments

Comments
 (0)