Skip to content

Commit 5e56258

Browse files
authored
Merge pull request ceph#43754 from cyx1231st/wip-seastore-fix-journal-committed-to
crimson/os/seastore: fix ordered updates to JournalSegmentManager::committed_to Reviewed-by: Samuel Just <[email protected]> Reviewed-by: Xuehan Xu <[email protected]>
2 parents df9988d + 0ad74ec commit 5e56258

File tree

8 files changed

+69
-71
lines changed

8 files changed

+69
-71
lines changed

src/crimson/os/seastore/extent_placement_manager.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -69,7 +69,7 @@ class ool_record_t {
6969
extent_offset += extent.get_bptr().length();
7070
}
7171
assert(extent_offset == (segment_off_t)(base + rsize.mdlength + rsize.dlength));
72-
return encode_record(rsize, std::move(record), block_size, base, nonce);
72+
return encode_record(rsize, std::move(record), block_size, journal_seq_t(), nonce);
7373
}
7474
void add_extent(LogicalCachedExtentRef& extent) {
7575
extents.emplace_back(extent);
@@ -87,7 +87,7 @@ class ool_record_t {
8787
void set_base(segment_off_t b) {
8888
base = b;
8989
}
90-
segment_off_t get_base() {
90+
segment_off_t get_base() const {
9191
return base;
9292
}
9393
void clear() {

src/crimson/os/seastore/extent_reader.cc

Lines changed: 21 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,7 @@ ExtentReader::scan_extents_ret ExtentReader::scan_extents(
6262
{
6363
auto ret = std::make_unique<scan_extents_ret_bare>();
6464
auto* extents = ret.get();
65-
return read_segment_header(cursor.get_offset().segment
65+
return read_segment_header(cursor.get_segment_id()
6666
).handle_error(
6767
scan_extents_ertr::pass_further{},
6868
crimson::ct_error::assert_all{
@@ -114,9 +114,9 @@ ExtentReader::scan_valid_records_ret ExtentReader::scan_valid_records(
114114
found_record_handler_t &handler)
115115
{
116116
auto& segment_manager =
117-
*segment_managers[cursor.offset.segment.device_id()];
118-
if (cursor.offset.offset == 0) {
119-
cursor.offset.offset = segment_manager.get_block_size();
117+
*segment_managers[cursor.get_segment_id().device_id()];
118+
if (cursor.get_segment_offset() == 0) {
119+
cursor.increment(segment_manager.get_block_size());
120120
}
121121
auto retref = std::make_unique<size_t>(0);
122122
auto &budget_used = *retref;
@@ -125,30 +125,33 @@ ExtentReader::scan_valid_records_ret ExtentReader::scan_valid_records(
125125
-> scan_valid_records_ertr::future<seastar::stop_iteration> {
126126
return [=, &handler, &cursor, &budget_used] {
127127
if (!cursor.last_valid_header_found) {
128-
return read_validate_record_metadata(cursor.offset, nonce
128+
return read_validate_record_metadata(cursor.seq.offset, nonce
129129
).safe_then([=, &cursor](auto md) {
130130
logger().debug(
131131
"ExtentReader::scan_valid_records: read complete {}",
132-
cursor.offset);
132+
cursor.seq);
133133
if (!md) {
134134
logger().debug(
135135
"ExtentReader::scan_valid_records: found invalid header at {}, presumably at end",
136-
cursor.offset);
136+
cursor.seq);
137137
cursor.last_valid_header_found = true;
138138
return scan_valid_records_ertr::now();
139139
} else {
140+
auto new_committed_to = md->first.committed_to;
140141
logger().debug(
141-
"ExtentReader::scan_valid_records: valid record read at {}",
142-
cursor.offset);
143-
cursor.last_committed = paddr_t{
144-
cursor.offset.segment,
145-
md->first.committed_to};
142+
"ExtentReader::scan_valid_records: valid record read at {}, now committed at {}",
143+
cursor.seq,
144+
new_committed_to);
145+
ceph_assert(cursor.last_committed == journal_seq_t() ||
146+
cursor.last_committed <= new_committed_to);
147+
cursor.last_committed = new_committed_to;
146148
cursor.pending_records.emplace_back(
147-
cursor.offset,
149+
cursor.seq.offset,
148150
md->first,
149151
md->second);
150-
cursor.offset.offset +=
151-
md->first.dlength + md->first.mdlength;
152+
cursor.increment(md->first.dlength + md->first.mdlength);
153+
ceph_assert(new_committed_to == journal_seq_t() ||
154+
new_committed_to < cursor.seq);
152155
return scan_valid_records_ertr::now();
153156
}
154157
}).safe_then([=, &cursor, &budget_used, &handler] {
@@ -166,7 +169,9 @@ ExtentReader::scan_valid_records_ret ExtentReader::scan_valid_records(
166169
seastar::stop_iteration>(seastar::stop_iteration::yes);
167170
}
168171
auto &next = cursor.pending_records.front();
169-
if (next.offset > cursor.last_committed) {
172+
journal_seq_t next_seq = {cursor.seq.segment_seq, next.offset};
173+
if (cursor.last_committed == journal_seq_t() ||
174+
next_seq > cursor.last_committed) {
170175
return scan_valid_records_ertr::make_ready_future<
171176
seastar::stop_iteration>(seastar::stop_iteration::yes);
172177
}

src/crimson/os/seastore/journal.cc

Lines changed: 7 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -170,7 +170,7 @@ Journal::replay_segment(
170170
{
171171
logger().debug("Journal::replay_segment: starting at {}", seq);
172172
return seastar::do_with(
173-
scan_valid_records_cursor(seq.offset),
173+
scan_valid_records_cursor(seq),
174174
ExtentReader::found_record_handler_t(
175175
[=, &handler](paddr_t base,
176176
const record_header_t &header,
@@ -378,13 +378,9 @@ void Journal::JournalSegmentManager::mark_committed(
378378
logger().debug(
379379
"JournalSegmentManager::mark_committed: committed_to {} => {}",
380380
committed_to, new_committed_to);
381-
assert(new_committed_to.segment_seq <=
382-
get_segment_seq());
383-
if (new_committed_to.segment_seq ==
384-
get_segment_seq()) {
385-
assert(committed_to.offset.offset < new_committed_to.offset.offset);
386-
committed_to = new_committed_to;
387-
}
381+
assert(committed_to == journal_seq_t() ||
382+
committed_to <= new_committed_to);
383+
committed_to = new_committed_to;
388384
}
389385

390386
Journal::JournalSegmentManager::initialize_segment_ertr::future<>
@@ -401,7 +397,7 @@ Journal::JournalSegmentManager::initialize_segment(Segment& segment)
401397
auto header = segment_header_t{
402398
seq,
403399
segment.get_segment_id(),
404-
segment_provider->get_journal_tail_target(),
400+
new_tail,
405401
current_segment_nonce,
406402
false};
407403
logger().debug(
@@ -421,14 +417,9 @@ Journal::JournalSegmentManager::initialize_segment(Segment& segment)
421417
bl.append(bp);
422418

423419
written_to = 0;
424-
// FIXME: improve committed_to to point to another segment
425-
committed_to = get_current_write_seq();
426420
return write(bl
427421
).safe_then([this, new_tail, write_size=bl.length()
428422
](journal_seq_t write_start_seq) {
429-
auto committed_to = write_start_seq;
430-
committed_to.offset.offset += write_size;
431-
mark_committed(committed_to);
432423
segment_provider->update_journal_tail_committed(new_tail);
433424
});
434425
}
@@ -475,7 +466,7 @@ Journal::RecordBatch::add_pending(
475466

476467
ceph::bufferlist Journal::RecordBatch::encode_records(
477468
size_t block_size,
478-
segment_off_t committed_to,
469+
const journal_seq_t& committed_to,
479470
segment_nonce_t segment_nonce)
480471
{
481472
logger().debug(
@@ -532,7 +523,7 @@ ceph::bufferlist Journal::RecordBatch::submit_pending_fast(
532523
record_t&& record,
533524
const record_size_t& rsize,
534525
size_t block_size,
535-
segment_off_t committed_to,
526+
const journal_seq_t& committed_to,
536527
segment_nonce_t segment_nonce)
537528
{
538529
logger().debug(

src/crimson/os/seastore/journal.h

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -140,10 +140,8 @@ class Journal {
140140
return current_segment_nonce;
141141
}
142142

143-
segment_off_t get_committed_to() const {
144-
assert(committed_to.segment_seq ==
145-
get_segment_seq());
146-
return committed_to.offset.offset;
143+
journal_seq_t get_committed_to() const {
144+
return committed_to;
147145
}
148146

149147
segment_seq_t get_segment_seq() const {
@@ -287,7 +285,7 @@ class Journal {
287285
// Encode the batched records for write.
288286
ceph::bufferlist encode_records(
289287
size_t block_size,
290-
segment_off_t committed_to,
288+
const journal_seq_t& committed_to,
291289
segment_nonce_t segment_nonce);
292290

293291
// Set the write result and reset for reuse
@@ -304,7 +302,7 @@ class Journal {
304302
record_t&&,
305303
const record_size_t&,
306304
size_t block_size,
307-
segment_off_t committed_to,
305+
const journal_seq_t& committed_to,
308306
segment_nonce_t segment_nonce);
309307

310308
private:

src/crimson/os/seastore/seastore_types.cc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -150,7 +150,7 @@ ceph::bufferlist encode_record(
150150
record_size_t rsize,
151151
record_t &&record,
152152
size_t block_size,
153-
segment_off_t committed_to,
153+
const journal_seq_t& committed_to,
154154
segment_nonce_t current_segment_nonce)
155155
{
156156
bufferlist data_bl;

src/crimson/os/seastore/seastore_types.h

Lines changed: 19 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1143,11 +1143,11 @@ struct record_header_t {
11431143
// Fixed portion
11441144
extent_len_t mdlength; // block aligned, length of metadata
11451145
extent_len_t dlength; // block aligned, length of data
1146-
uint32_t deltas; // number of deltas
1147-
uint32_t extents; // number of extents
1146+
uint32_t deltas; // number of deltas
1147+
uint32_t extents; // number of extents
11481148
segment_nonce_t segment_nonce;// nonce of containing segment
1149-
segment_off_t committed_to; // records in this segment prior to committed_to
1150-
// have been fully written
1149+
journal_seq_t committed_to; // records prior to committed_to have been
1150+
// fully written, maybe in another segment.
11511151
checksum_t data_crc; // crc of data payload
11521152

11531153

@@ -1188,14 +1188,14 @@ ceph::bufferlist encode_record(
11881188
record_size_t rsize,
11891189
record_t &&record,
11901190
size_t block_size,
1191-
segment_off_t committed_to,
1191+
const journal_seq_t& committed_to,
11921192
segment_nonce_t current_segment_nonce = 0);
11931193

11941194
/// scan segment for end incrementally
11951195
struct scan_valid_records_cursor {
11961196
bool last_valid_header_found = false;
1197-
paddr_t offset;
1198-
paddr_t last_committed;
1197+
journal_seq_t seq;
1198+
journal_seq_t last_committed;
11991199

12001200
struct found_record_t {
12011201
paddr_t offset;
@@ -1214,13 +1214,21 @@ struct scan_valid_records_cursor {
12141214
return last_valid_header_found && pending_records.empty();
12151215
}
12161216

1217-
paddr_t get_offset() const {
1218-
return offset;
1217+
segment_id_t get_segment_id() const {
1218+
return seq.offset.segment;
1219+
}
1220+
1221+
segment_off_t get_segment_offset() const {
1222+
return seq.offset.offset;
1223+
}
1224+
1225+
void increment(segment_off_t off) {
1226+
seq.offset.offset += off;
12191227
}
12201228

12211229
scan_valid_records_cursor(
1222-
paddr_t offset)
1223-
: offset(offset) {}
1230+
journal_seq_t seq)
1231+
: seq(seq) {}
12241232
};
12251233

12261234
}

src/crimson/os/seastore/segment_cleaner.cc

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -321,20 +321,18 @@ SegmentCleaner::gc_trim_journal_ret SegmentCleaner::gc_trim_journal()
321321
SegmentCleaner::gc_reclaim_space_ret SegmentCleaner::gc_reclaim_space()
322322
{
323323
if (!scan_cursor) {
324-
paddr_t next = P_ADDR_NULL;
325-
next.segment = get_next_gc_target();
326-
if (next == P_ADDR_NULL) {
324+
journal_seq_t next = get_next_gc_target();
325+
if (next == journal_seq_t()) {
327326
logger().debug(
328327
"SegmentCleaner::do_gc: no segments to gc");
329328
return seastar::now();
330329
}
331-
next.offset = 0;
332330
scan_cursor =
333331
std::make_unique<ExtentReader::scan_extents_cursor>(
334332
next);
335333
logger().debug(
336334
"SegmentCleaner::do_gc: starting gc on segment {}",
337-
scan_cursor->get_offset().segment);
335+
scan_cursor->seq);
338336
} else {
339337
ceph_assert(!scan_cursor->is_complete());
340338
}
@@ -384,7 +382,7 @@ SegmentCleaner::gc_reclaim_space_ret SegmentCleaner::gc_reclaim_space()
384382
});
385383
}).si_then([this, &t] {
386384
if (scan_cursor->is_complete()) {
387-
t.mark_segment_to_release(scan_cursor->get_offset().segment);
385+
t.mark_segment_to_release(scan_cursor->get_segment_id());
388386
}
389387
return ecb->submit_transaction_direct(t);
390388
});

src/crimson/os/seastore/segment_cleaner.h

Lines changed: 11 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -683,10 +683,6 @@ class SegmentCleaner : public SegmentProvider {
683683

684684
void update_journal_tail_target(journal_seq_t target);
685685

686-
void init_journal_tail(journal_seq_t tail) {
687-
journal_tail_target = journal_tail_committed = tail;
688-
}
689-
690686
void init_mkfs(journal_seq_t head) {
691687
journal_tail_target = head;
692688
journal_tail_committed = head;
@@ -773,30 +769,32 @@ class SegmentCleaner : public SegmentProvider {
773769
assert(ret >= 0);
774770
}
775771

776-
segment_id_t get_next_gc_target() const {
777-
segment_id_t ret = NULL_SEG_ID;
772+
journal_seq_t get_next_gc_target() const {
773+
segment_id_t id = NULL_SEG_ID;
778774
segment_seq_t seq = NULL_SEG_SEQ;
779775
int64_t least_live_bytes = std::numeric_limits<int64_t>::max();
780776
for (auto it = segments.begin();
781777
it != segments.end();
782778
++it) {
783-
auto id = it->first;
779+
auto _id = it->first;
784780
const auto& segment_info = it->second;
785781
if (segment_info.is_closed() &&
786782
!segment_info.is_in_journal(journal_tail_committed) &&
787-
space_tracker->get_usage(id) < least_live_bytes) {
788-
ret = id;
783+
space_tracker->get_usage(_id) < least_live_bytes) {
784+
id = _id;
789785
seq = segment_info.journal_segment_seq;
790786
least_live_bytes = space_tracker->get_usage(id);
791787
}
792788
}
793-
if (ret != NULL_SEG_ID) {
789+
if (id != NULL_SEG_ID) {
794790
crimson::get_logger(ceph_subsys_seastore).debug(
795791
"SegmentCleaner::get_next_gc_target: segment {} seq {}",
796-
ret,
792+
id,
797793
seq);
794+
return journal_seq_t{seq, {id, 0}};
795+
} else {
796+
return journal_seq_t();
798797
}
799-
return ret;
800798
}
801799

802800
SpaceTrackerIRef get_empty_space_tracker() const {
@@ -987,7 +985,7 @@ class SegmentCleaner : public SegmentProvider {
987985
if (!scan_cursor)
988986
return 0;
989987

990-
return scan_cursor->get_offset().offset;
988+
return scan_cursor->get_segment_offset();
991989
}
992990

993991
/// Returns free space available for writes

0 commit comments

Comments
 (0)