Skip to content

Commit 15722b8

Browse files
authored
Merge pull request ceph#57782 from myoungwon/wip-nvme-pi
crimson/os/seastore: add checksum offload to RBM Reviewed-by: Yingxin Cheng <[email protected]>
2 parents 86385b1 + 07d0663 commit 15722b8

22 files changed

+390
-100
lines changed

src/common/options/crimson.yaml.in

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -154,3 +154,10 @@ options:
154154
level: dev
155155
desc: overwrite the existing data block based on delta if the overwrite size is equal to or less than the value, otherwise do overwrite based on remapping, set to 0 to enforce the remap-based overwrite.
156156
default: 0
157+
- name: seastore_disable_end_to_end_data_protection
158+
type: bool
159+
level: dev
160+
desc: When false, upon mkfs, try to discover whether the nvme device supports
161+
internal checksum feature without using sever CPU then enable if available,
162+
set to true to disable unconditionally.
163+
default: true

src/crimson/os/seastore/cache.cc

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1562,7 +1562,13 @@ void Cache::complete_commit(
15621562
is_inline = true;
15631563
i->set_paddr(final_block_start.add_relative(i->get_paddr()));
15641564
}
1565-
assert(i->get_last_committed_crc() == i->calc_crc32c());
1565+
#ifndef NDEBUG
1566+
if (i->get_paddr().is_root() || epm.get_checksum_needed(i->get_paddr())) {
1567+
assert(i->get_last_committed_crc() == i->calc_crc32c());
1568+
} else {
1569+
assert(i->get_last_committed_crc() == CRC_NULL);
1570+
}
1571+
#endif
15661572
i->pending_for_transaction = TRANS_ID_NULL;
15671573
i->on_initial_write();
15681574

src/crimson/os/seastore/cache.h

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1651,7 +1651,7 @@ class Cache {
16511651
extent->get_length(),
16521652
extent->get_bptr()
16531653
).safe_then(
1654-
[extent=std::move(extent)]() mutable {
1654+
[extent=std::move(extent), this]() mutable {
16551655
LOG_PREFIX(Cache::read_extent);
16561656
if (likely(extent->state == CachedExtent::extent_state_t::CLEAN_PENDING)) {
16571657
extent->state = CachedExtent::extent_state_t::CLEAN;
@@ -1662,7 +1662,11 @@ class Cache {
16621662
if (extent->is_valid()) {
16631663
// crc will be checked against LBA leaf entry for logical extents,
16641664
// or check against in-extent crc for physical extents.
1665-
extent->last_committed_crc = extent->calc_crc32c();
1665+
if (epm.get_checksum_needed(extent->get_paddr())) {
1666+
extent->last_committed_crc = extent->calc_crc32c();
1667+
} else {
1668+
extent->last_committed_crc = CRC_NULL;
1669+
}
16661670
extent->on_clean_read();
16671671
}
16681672
extent->complete_io();

src/crimson/os/seastore/device.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -137,6 +137,10 @@ class Device {
137137

138138
virtual secondary_device_set_t& get_secondary_devices() = 0;
139139

140+
virtual bool is_end_to_end_data_protection() const {
141+
return false;
142+
}
143+
140144
using close_ertr = crimson::errorator<
141145
crimson::ct_error::input_output_error>;
142146
virtual close_ertr::future<> close() = 0;

src/crimson/os/seastore/extent_placement_manager.h

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -551,6 +551,15 @@ class ExtentPlacementManager {
551551
return background_process.run_until_halt();
552552
}
553553

554+
bool get_checksum_needed(paddr_t addr) {
555+
// checksum offloading only for blocks physically stored in the device
556+
if (addr.is_fake()) {
557+
return true;
558+
}
559+
assert(addr.is_absolute());
560+
return !devices_by_id[addr.get_device_id()]->is_end_to_end_data_protection();
561+
}
562+
554563
private:
555564
rewrite_gen_t adjust_generation(
556565
data_category_t category,

src/crimson/os/seastore/journal.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -107,6 +107,8 @@ class Journal {
107107
virtual ~Journal() {}
108108

109109
virtual backend_type_t get_type() = 0;
110+
111+
virtual bool is_checksum_needed() = 0;
110112
};
111113
using JournalRef = std::unique_ptr<Journal>;
112114

src/crimson/os/seastore/journal/circular_bounded_journal.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -186,6 +186,10 @@ class CircularBoundedJournal : public Journal, RecordScanner {
186186
return get_journal_end();
187187
}
188188

189+
bool is_checksum_needed() final {
190+
return cjs.is_checksum_needed();
191+
}
192+
189193
// Test interfaces
190194

191195
CircularJournalSpace& get_cjs() {

src/crimson/os/seastore/journal/circular_journal_space.cc

Lines changed: 27 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -152,16 +152,18 @@ ceph::bufferlist CircularJournalSpace::encode_header()
152152
{
153153
bufferlist bl;
154154
encode(header, bl);
155-
auto header_crc_filler = bl.append_hole(sizeof(checksum_t));
156-
auto bliter = bl.cbegin();
157-
auto header_crc = bliter.crc32c(
158-
ceph::encoded_sizeof_bounded<cbj_header_t>(),
159-
-1);
160-
ceph_le32 header_crc_le;
161-
header_crc_le = header_crc;
162-
header_crc_filler.copy_in(
163-
sizeof(checksum_t),
164-
reinterpret_cast<const char *>(&header_crc_le));
155+
if (!device->is_end_to_end_data_protection()) {
156+
auto header_crc_filler = bl.append_hole(sizeof(checksum_t));
157+
auto bliter = bl.cbegin();
158+
auto header_crc = bliter.crc32c(
159+
ceph::encoded_sizeof_bounded<cbj_header_t>(),
160+
-1);
161+
ceph_le32 header_crc_le;
162+
header_crc_le = header_crc;
163+
header_crc_filler.copy_in(
164+
sizeof(checksum_t),
165+
reinterpret_cast<const char *>(&header_crc_le));
166+
}
165167
return bl;
166168
}
167169

@@ -193,7 +195,7 @@ CircularJournalSpace::read_header()
193195
device->get_block_size()));
194196
DEBUG("reading {}", device->get_shard_journal_start());
195197
return device->read(device->get_shard_journal_start(), bptr
196-
).safe_then([bptr, FNAME]() mutable
198+
).safe_then([bptr, FNAME, this]() mutable
197199
-> read_header_ret {
198200
bufferlist bl;
199201
bl.append(bptr);
@@ -205,18 +207,20 @@ CircularJournalSpace::read_header()
205207
ERROR("unable to read header block");
206208
return crimson::ct_error::enoent::make();
207209
}
208-
auto bliter = bl.cbegin();
209-
auto test_crc = bliter.crc32c(
210-
ceph::encoded_sizeof_bounded<cbj_header_t>(),
211-
-1);
212-
ceph_le32 recorded_crc_le;
213-
decode(recorded_crc_le, bliter);
214-
uint32_t recorded_crc = recorded_crc_le;
215-
if (test_crc != recorded_crc) {
216-
ERROR("error, header crc mismatch.");
217-
return read_header_ret(
218-
read_header_ertr::ready_future_marker{},
219-
std::nullopt);
210+
if (!device->is_end_to_end_data_protection()) {
211+
auto bliter = bl.cbegin();
212+
auto test_crc = bliter.crc32c(
213+
ceph::encoded_sizeof_bounded<cbj_header_t>(),
214+
-1);
215+
ceph_le32 recorded_crc_le;
216+
decode(recorded_crc_le, bliter);
217+
uint32_t recorded_crc = recorded_crc_le;
218+
if (test_crc != recorded_crc) {
219+
ERROR("error, header crc mismatch.");
220+
return read_header_ret(
221+
read_header_ertr::ready_future_marker{},
222+
std::nullopt);
223+
}
220224
}
221225
return read_header_ret(
222226
read_header_ertr::ready_future_marker{},

src/crimson/os/seastore/journal/circular_journal_space.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -242,6 +242,10 @@ class CircularJournalSpace : public JournalAllocator {
242242
return header;
243243
}
244244

245+
bool is_checksum_needed() {
246+
return !device->is_end_to_end_data_protection();
247+
}
248+
245249
private:
246250
std::string print_name;
247251
cbj_header_t header;

src/crimson/os/seastore/journal/segmented_journal.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,11 @@ class SegmentedJournal : public Journal {
6363
return seastar::now();
6464
}
6565

66+
bool is_checksum_needed() final {
67+
// segmented journal always requires checksum
68+
return true;
69+
}
70+
6671
private:
6772
submit_record_ret do_submit_record(
6873
record_t &&record,

0 commit comments

Comments
 (0)