Skip to content

Commit 98d8c60

Browse files
Merge pull request ceph#62170 from connorfawcett/wip-online-consistency-checker-0703
erasure-code: Add a new utility which checks the consistency of objects within an EC pool
2 parents 0f1fed1 + 14a11de commit 98d8c60

26 files changed

+1404
-16
lines changed

src/common/json/OSDStructures.cc

Lines changed: 25 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -61,14 +61,16 @@ void OSDPoolGetRequest::decode_json(JSONObj* obj) {
6161

6262
void OSDPoolGetReply::dump(Formatter* f) const {
6363
encode_json("erasure_code_profile", erasure_code_profile, f);
64+
encode_json("allow_ec_optimizations", allow_ec_optimizations, f);
6465
}
6566

6667
void OSDPoolGetReply::decode_json(JSONObj* obj) {
6768
JSONDecoder::decode_json("erasure_code_profile", erasure_code_profile, obj);
69+
JSONDecoder::decode_json("allow_ec_optimizations", allow_ec_optimizations, obj);
6870
}
6971

7072
void OSDECProfileGetRequest::dump(Formatter* f) const {
71-
encode_json("prefix", "osd pool get", f);
73+
encode_json("prefix", "osd erasure-code-profile get", f);
7274
encode_json("name", name, f);
7375
encode_json("format", format, f);
7476
}
@@ -159,4 +161,26 @@ void OSDSetRequest::dump(Formatter* f) const {
159161
void OSDSetRequest::decode_json(JSONObj* obj) {
160162
JSONDecoder::decode_json("key", key, obj);
161163
JSONDecoder::decode_json("yes_i_really_mean_it", yes_i_really_mean_it, obj);
164+
}
165+
166+
void InjectECParityRead::dump(Formatter* f) const {
167+
encode_json("prefix", "injectparityread", f);
168+
encode_json("pool", pool, f);
169+
encode_json("objname", objname, f);
170+
}
171+
172+
void InjectECParityRead::decode_json(JSONObj* obj) {
173+
JSONDecoder::decode_json("pool", pool, obj);
174+
JSONDecoder::decode_json("objname", objname, obj);
175+
}
176+
177+
void InjectECClearParityRead::dump(Formatter* f) const {
178+
encode_json("prefix", "injectclearparityread", f);
179+
encode_json("pool", pool, f);
180+
encode_json("objname", objname, f);
181+
}
182+
183+
void InjectECClearParityRead::decode_json(JSONObj* obj) {
184+
JSONDecoder::decode_json("pool", pool, obj);
185+
JSONDecoder::decode_json("objname", objname, obj);
162186
}

src/common/json/OSDStructures.h

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,7 @@ struct OSDPoolGetRequest {
5050

5151
struct OSDPoolGetReply {
5252
std::string erasure_code_profile;
53-
53+
bool allow_ec_optimizations;
5454
void dump(Formatter* f) const;
5555
void decode_json(JSONObj* obj);
5656
};
@@ -190,6 +190,20 @@ struct InjectECClearErrorRequest {
190190
JSONDecoder::decode_json("type", type, obj);
191191
}
192192
};
193+
struct InjectECParityRead {
194+
std::string pool;
195+
std::string objname;
196+
197+
void dump(Formatter* f) const;
198+
void decode_json(JSONObj* obj);
199+
};
200+
struct InjectECClearParityRead {
201+
std::string pool;
202+
std::string objname;
203+
204+
void dump(Formatter* f) const;
205+
void decode_json(JSONObj* obj);
206+
};
193207
} // namespace osd
194208
} // namespace messaging
195209
} // namespace ceph

src/erasure-code/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ add_subdirectory(jerasure)
2121
add_subdirectory(lrc)
2222
add_subdirectory(shec)
2323
add_subdirectory(clay)
24+
add_subdirectory(consistency)
2425

2526
if(HAVE_NASM_X64_AVX2 OR HAVE_ARMV8_SIMD)
2627
set(WITH_EC_ISA_PLUGIN TRUE CACHE BOOL "")
Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
add_library(ec_consistency STATIC
2+
ECReader.cc
3+
ECEncoder.cc
4+
ECEncoderSwitch.cc
5+
Pool.cc
6+
ConsistencyChecker.cc
7+
RadosCommands.cc
8+
${PROJECT_SOURCE_DIR}/src/osd/ECUtilL.cc
9+
${PROJECT_SOURCE_DIR}/src/osd/ECUtil.cc
10+
)
11+
12+
target_link_libraries(ec_consistency
13+
librados
14+
global
15+
json_structures
16+
)
17+
18+
add_executable(ceph_ec_consistency_checker
19+
${CMAKE_CURRENT_SOURCE_DIR}/ceph_ec_consistency_checker.cc
20+
${PROJECT_SOURCE_DIR}/src/osd/ECUtilL.cc
21+
${PROJECT_SOURCE_DIR}/src/osd/ECUtil.cc)
22+
target_link_libraries(ceph_ec_consistency_checker
23+
librados global ec_consistency)
24+
install(TARGETS
25+
ceph_ec_consistency_checker
26+
DESTINATION ${CMAKE_INSTALL_BINDIR})
Lines changed: 169 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,169 @@
1+
#include "ConsistencyChecker.h"
2+
3+
#include "RadosCommands.h"
4+
#include "Pool.h"
5+
#include "ECReader.h"
6+
#include "ECEncoder.h"
7+
#include "ECEncoderSwitch.h"
8+
9+
using ConsistencyChecker = ceph::consistency::ConsistencyChecker;
10+
11+
using Read = ceph::consistency::Read;
12+
using ReadResult = ceph::consistency::ReadResult;
13+
using bufferlist = ceph::bufferlist;
14+
15+
ConsistencyChecker::ConsistencyChecker(librados::Rados &rados,
16+
boost::asio::io_context& asio,
17+
const std::string& pool_name,
18+
int stripe_unit) :
19+
rados(rados),
20+
asio(asio),
21+
reader(ceph::consistency::ECReader(rados, asio, pool_name)),
22+
commands(ceph::consistency::RadosCommands(rados)),
23+
pool(pool_name,
24+
commands.get_ec_profile_for_pool(pool_name),
25+
commands.get_pool_allow_ec_optimizations(pool_name)),
26+
encoder(ceph::consistency::ECEncoderSwitch(pool.get_ec_profile(),
27+
stripe_unit,
28+
commands.get_pool_allow_ec_optimizations(pool_name)
29+
)) {}
30+
31+
/**
32+
* Perform an end-to-end read and consistency check on a single object.
33+
* Current implementation only supports reading the entire object, so length and
34+
* offset should normally be 0.
35+
*
36+
* @param oid string Name of the pool to perform inject on
37+
* @param block_size int Block size for the data being read
38+
* @param offset int Which offset to read from
39+
* @param length int How much data of each shard to read
40+
* @return bool true if consistent, otherwise false
41+
*/
42+
bool ConsistencyChecker::single_read_and_check_consistency(const std::string& oid,
43+
int block_size,
44+
int offset,
45+
int length)
46+
{
47+
clear_results();
48+
std::string error_message = "";
49+
bool success = true;
50+
51+
auto read = Read(oid, block_size, offset, length);
52+
queue_ec_read(read);
53+
54+
auto read_results = reader.get_results();
55+
int result_count = read_results->size();
56+
if (result_count != 1) {
57+
error_message = "Incorrect number of RADOS read results returned, count: "
58+
+ std::to_string(result_count);
59+
success = false;
60+
}
61+
62+
ReadResult read_result = (*read_results)[0];
63+
boost::system::error_code ec = read_result.get_ec();
64+
if (success && ec != boost::system::errc::success) {
65+
error_message = "RADOS Read failed, error message: " + ec.message();
66+
success = false;
67+
}
68+
69+
if (success && read_result.get_data().length() == 0) {
70+
error_message = "Empty object returned from RADOS read.";
71+
success = false;
72+
}
73+
74+
if (success && !check_object_consistency(oid, read_result.get_data())) {
75+
error_message = "Generated parity did not match read in parity shards.";
76+
success = false;
77+
}
78+
79+
results.push_back({oid, error_message, success});
80+
commands.inject_clear_parity_read_on_primary_osd(pool.get_pool_name(),
81+
oid);
82+
return success;
83+
}
84+
85+
/**
86+
* Queue up an EC read with the parity read inject set
87+
*
88+
* @param read Object containing information about the read
89+
*/
90+
void ConsistencyChecker::queue_ec_read(Read read)
91+
{
92+
commands.inject_parity_read_on_primary_osd(pool.get_pool_name(),
93+
read.get_oid());
94+
reader.do_read(read);
95+
}
96+
97+
/**
98+
* Generate parities from the data and compare to the parity shards
99+
*
100+
* @param oid string The object ID of the object being checked
101+
* @param inbl bufferlist The entire contents of the object, including parities
102+
* @param stripe_unit int The chunk size for the object
103+
*/
104+
bool ConsistencyChecker::check_object_consistency(const std::string& oid,
105+
const bufferlist& inbl)
106+
{
107+
bool is_optimized = pool.has_optimizations_enabled();
108+
std::pair<bufferlist, bufferlist> data_and_parity;
109+
data_and_parity = split_data_and_parity(oid, inbl, encoder.get_k(),
110+
encoder.get_m(), is_optimized);
111+
112+
std::optional<bufferlist> outbl;
113+
outbl = encoder.do_encode(data_and_parity.first);
114+
115+
if (!outbl.has_value()) {
116+
return false;
117+
}
118+
119+
return buffers_match(outbl.value(), data_and_parity.second);
120+
}
121+
122+
void ConsistencyChecker::print_results(std::ostream& out)
123+
{
124+
out << "Results:" << std::endl;
125+
for (const auto &r : results) {
126+
std::string result_str = (r.get_result()) ? "Passed" : "Failed";
127+
std::string error_str = r.get_error_message();
128+
out << "Object ID " << r.get_oid() << ": " << result_str << std::endl;
129+
if (!error_str.empty()) {
130+
out << "Error Message: " << error_str << std::endl;
131+
}
132+
}
133+
134+
int count = results.size();
135+
std::string obj_str = (count == 1) ? "object checked." : "objects checked.";
136+
out << "Total: " << count << " " << obj_str << std::endl;
137+
}
138+
139+
std::pair<bufferlist, bufferlist>
140+
ConsistencyChecker::split_data_and_parity(const std::string& oid,
141+
const bufferlist& read,
142+
int k, int m,
143+
bool is_optimized)
144+
{
145+
uint64_t data_size, parity_size;
146+
147+
// Optimized EC parity read should return the exact object size + parity shards
148+
// Legacy EC parity read will return the entire padded data shards + parity shards
149+
data_size = is_optimized ? reader.get_object_size(oid) : (read.length() / (k + m)) * k;
150+
parity_size = read.length() - data_size;
151+
152+
bufferlist data, parity;
153+
auto it = read.begin();
154+
it.copy(data_size, data);
155+
it.copy(parity_size, parity);
156+
return std::pair<bufferlist, bufferlist>(data, parity);
157+
}
158+
159+
bool ConsistencyChecker::buffers_match(const bufferlist& b1,
160+
const bufferlist& b2)
161+
{
162+
return (b1.contents_equal(b2));
163+
}
164+
165+
void ConsistencyChecker::clear_results()
166+
{
167+
reader.clear_results();
168+
results.clear();
169+
}
Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,68 @@
1+
#pragma once
2+
3+
#include <boost/asio/io_context.hpp>
4+
#include <boost/program_options.hpp>
5+
#include "librados/librados_asio.h"
6+
#include "global/global_init.h"
7+
#include "global/global_context.h"
8+
9+
#include "Pool.h"
10+
#include "ECReader.h"
11+
#include "RadosCommands.h"
12+
#include "ECEncoder.h"
13+
#include "ECEncoderSwitch.h"
14+
15+
#define dout_context g_ceph_context
16+
17+
namespace ceph {
18+
namespace consistency {
19+
class ConsistencyCheckResult {
20+
private:
21+
std::string oid;
22+
std::string error_message;
23+
bool result;
24+
25+
public:
26+
std::string get_oid() const { return oid; }
27+
std::string get_error_message() const { return error_message; }
28+
bool get_result() const { return result; }
29+
ConsistencyCheckResult(std::string oid,
30+
std::string error_message,
31+
bool result) :
32+
oid(oid),
33+
error_message(error_message),
34+
result(result) {}
35+
};
36+
37+
class ConsistencyChecker {
38+
private:
39+
librados::Rados& rados;
40+
boost::asio::io_context& asio;
41+
ceph::consistency::ECReader reader;
42+
ceph::consistency::RadosCommands commands;
43+
ceph::consistency::Pool pool;
44+
ceph::consistency::ECEncoderSwitch encoder;
45+
std::vector<ConsistencyCheckResult> results;
46+
bool buffers_match(const bufferlist& b1, const bufferlist& b2);
47+
std::pair<bufferlist, bufferlist> split_data_and_parity(const std::string& oid,
48+
const bufferlist& read,
49+
int k, int m,
50+
bool is_optimized);
51+
52+
public:
53+
ConsistencyChecker(librados::Rados& rados,
54+
boost::asio::io_context& asio,
55+
const std::string& pool_name,
56+
int stripe_unit);
57+
void queue_ec_read(Read read);
58+
bool check_object_consistency(const std::string& oid,
59+
const bufferlist& inbl);
60+
void print_results(std::ostream& out);
61+
void clear_results();
62+
bool single_read_and_check_consistency(const std::string& oid,
63+
int block_size,
64+
int offset,
65+
int length);
66+
};
67+
}
68+
}

0 commit comments

Comments
 (0)