Skip to content

Commit da6f3c4

Browse files
committed
Revert "crimson/os/seastore: wait ool writes in DeviceSubmission phase"
This reverts commit c9e423f. The commit starts to submit OOL writes before submitting the journal write, true, but it cannot guarantee that OOL writes finish before the journal write. Thus it is possible that during SeaStore restart, a journal record appears valid but its dependent OOL records are partial written, which leads to corruption. Signed-off-by: Yingxin Cheng <[email protected]>
1 parent 964adda commit da6f3c4

File tree

5 files changed

+40
-68
lines changed

5 files changed

+40
-68
lines changed

src/crimson/os/seastore/extent_placement_manager.cc

Lines changed: 36 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,8 @@ SegmentedOolWriter::SegmentedOolWriter(
2828
{
2929
}
3030

31-
void SegmentedOolWriter::write_record(
31+
SegmentedOolWriter::alloc_write_ertr::future<>
32+
SegmentedOolWriter::write_record(
3233
Transaction& t,
3334
record_t&& record,
3435
std::list<LogicalCachedExtentRef>&& extents,
@@ -62,20 +63,15 @@ void SegmentedOolWriter::write_record(
6263
extent_addr = extent_addr.as_seg_paddr().add_offset(
6364
extent->get_length());
6465
}
65-
// t might be destructed inside write_future
66-
auto write_future = seastar::with_gate(write_guard,
67-
[this, FNAME, tid=t.get_trans_id(),
68-
record_base=ret.record_base_regardless_md,
69-
submit_fut=std::move(ret.future)]() mutable {
70-
return std::move(submit_fut
71-
).safe_then([this, FNAME, tid, record_base](record_locator_t ret) {
72-
TRACE("trans.{} {} finish {}=={}",
73-
tid, segment_allocator.get_name(), ret, record_base);
74-
// ool won't write metadata, so the paddrs must be equal
75-
assert(ret.record_block_base == record_base.offset);
76-
});
66+
return std::move(ret.future
67+
).safe_then([this, FNAME, &t,
68+
record_base=ret.record_base_regardless_md
69+
](record_locator_t ret) {
70+
TRACET("{} finish {}=={}",
71+
t, segment_allocator.get_name(), ret, record_base);
72+
// ool won't write metadata, so the paddrs must be equal
73+
assert(ret.record_block_base == record_base.offset);
7774
});
78-
t.get_handle().add_write_future(std::move(write_future));
7975
}
8076

8177
SegmentedOolWriter::alloc_write_iertr::future<>
@@ -112,15 +108,19 @@ SegmentedOolWriter::do_write(
112108
DEBUGT("{} extents={} submit {} extents and roll, unavailable ...",
113109
t, segment_allocator.get_name(),
114110
extents.size(), num_extents);
111+
auto fut_write = alloc_write_ertr::now();
115112
if (num_extents > 0) {
116113
assert(record_submitter.check_action(record.size) !=
117114
action_t::ROLL);
118-
write_record(
115+
fut_write = write_record(
119116
t, std::move(record), std::move(pending_extents),
120117
true/* with_atomic_roll_segment */);
121118
}
122119
return trans_intr::make_interruptible(
123-
record_submitter.roll_segment()
120+
record_submitter.roll_segment(
121+
).safe_then([fut_write=std::move(fut_write)]() mutable {
122+
return std::move(fut_write);
123+
})
124124
).si_then([this, &t, &extents] {
125125
return do_write(t, extents);
126126
});
@@ -151,12 +151,15 @@ SegmentedOolWriter::do_write(
151151
DEBUGT("{} extents={} submit {} extents ...",
152152
t, segment_allocator.get_name(),
153153
extents.size(), pending_extents.size());
154-
write_record(t, std::move(record), std::move(pending_extents));
155-
if (!extents.empty()) {
156-
return do_write(t, extents);
157-
} else {
158-
return alloc_write_iertr::now();
159-
}
154+
return trans_intr::make_interruptible(
155+
write_record(t, std::move(record), std::move(pending_extents))
156+
).si_then([this, &t, &extents] {
157+
if (!extents.empty()) {
158+
return do_write(t, extents);
159+
} else {
160+
return alloc_write_iertr::now();
161+
}
162+
});
160163
}
161164
// SUBMIT_NOT_FULL: evaluate the next extent
162165
}
@@ -166,8 +169,8 @@ SegmentedOolWriter::do_write(
166169
t, segment_allocator.get_name(),
167170
num_extents);
168171
assert(num_extents > 0);
169-
write_record(t, std::move(record), std::move(pending_extents));
170-
return alloc_write_iertr::now();
172+
return trans_intr::make_interruptible(
173+
write_record(t, std::move(record), std::move(pending_extents)));
171174
}
172175

173176
SegmentedOolWriter::alloc_write_iertr::future<>
@@ -991,11 +994,13 @@ RandomBlockOolWriter::alloc_write_ool_extents(
991994
if (extents.empty()) {
992995
return alloc_write_iertr::now();
993996
}
994-
do_write(t, extents);
995-
return alloc_write_iertr::now();
997+
return seastar::with_gate(write_guard, [this, &t, &extents] {
998+
return do_write(t, extents);
999+
});
9961000
}
9971001

998-
void RandomBlockOolWriter::do_write(
1002+
RandomBlockOolWriter::alloc_write_iertr::future<>
1003+
RandomBlockOolWriter::do_write(
9991004
Transaction& t,
10001005
std::list<CachedExtentRef>& extents)
10011006
{
@@ -1048,10 +1053,8 @@ void RandomBlockOolWriter::do_write(
10481053
}
10491054
}
10501055

1051-
// t might be destructed inside write_future
1052-
auto write_future = seastar::with_gate(write_guard,
1053-
[writes=std::move(writes)]() mutable {
1054-
return seastar::do_with(std::move(writes),
1056+
return trans_intr::make_interruptible(
1057+
seastar::do_with(std::move(writes),
10551058
[](auto& writes) {
10561059
return crimson::do_for_each(writes,
10571060
[](auto& info) {
@@ -1062,9 +1065,8 @@ void RandomBlockOolWriter::do_write(
10621065
"Invalid error when writing record"}
10631066
);
10641067
});
1065-
});
1066-
});
1067-
t.get_handle().add_write_future(std::move(write_future));
1068+
})
1069+
);
10681070
}
10691071

10701072
}

src/crimson/os/seastore/extent_placement_manager.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -115,7 +115,7 @@ class SegmentedOolWriter : public ExtentOolWriter {
115115
Transaction& t,
116116
std::list<CachedExtentRef> &extent);
117117

118-
void write_record(
118+
alloc_write_ertr::future<> write_record(
119119
Transaction& t,
120120
record_t&& record,
121121
std::list<LogicalCachedExtentRef> &&extents,
@@ -195,7 +195,7 @@ class RandomBlockOolWriter : public ExtentOolWriter {
195195
ceph::bufferptr bp;
196196
RandomBlockManager* rbm;
197197
};
198-
void do_write(
198+
alloc_write_iertr::future<> do_write(
199199
Transaction& t,
200200
std::list<CachedExtentRef> &extent);
201201

src/crimson/os/seastore/journal/circular_bounded_journal.cc

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -97,9 +97,7 @@ CircularBoundedJournal::do_submit_record(
9797
auto submit_ret = record_submitter.submit(std::move(record));
9898
// submit_ret.record_base_regardless_md is wrong for journaling
9999
return handle.enter(write_pipeline->device_submission
100-
).then([&handle] {
101-
return handle.take_write_future();
102-
}).safe_then([submit_fut=std::move(submit_ret.future)]() mutable {
100+
).then([submit_fut=std::move(submit_ret.future)]() mutable {
103101
return std::move(submit_fut);
104102
}).safe_then([FNAME, this, &handle](record_locator_t result) {
105103
return handle.enter(write_pipeline->finalize

src/crimson/os/seastore/journal/segmented_journal.cc

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -396,9 +396,7 @@ SegmentedJournal::do_submit_record(
396396
auto submit_ret = record_submitter.submit(std::move(record));
397397
// submit_ret.record_base_regardless_md is wrong for journaling
398398
return handle.enter(write_pipeline->device_submission
399-
).then([&handle] {
400-
return handle.take_write_future();
401-
}).safe_then([submit_fut=std::move(submit_ret.future)]() mutable {
399+
).then([submit_fut=std::move(submit_ret.future)]() mutable {
402400
return std::move(submit_fut);
403401
}).safe_then([FNAME, this, &handle](record_locator_t result) {
404402
return handle.enter(write_pipeline->finalize

src/crimson/os/seastore/ordering_handle.h

Lines changed: 0 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -122,11 +122,6 @@ struct OrderingHandle {
122122
std::unique_ptr<OperationProxy> op;
123123
seastar::shared_mutex *collection_ordering_lock = nullptr;
124124

125-
using write_ertr = crimson::errorator<
126-
crimson::ct_error::input_output_error>;
127-
// the pending writes that should complete at DeviceSubmission phase
128-
write_ertr::future<> write_future = write_ertr::now();
129-
130125
// in the future we might add further constructors / template to type
131126
// erasure while extracting the location of tracking events.
132127
OrderingHandle(std::unique_ptr<OperationProxy> op) : op(std::move(op)) {}
@@ -149,31 +144,13 @@ struct OrderingHandle {
149144
}
150145
}
151146

152-
void add_write_future(write_ertr::future<>&& fut) {
153-
auto appended = std::move(write_future
154-
).safe_then([fut=std::move(fut)]() mutable {
155-
return std::move(fut);
156-
});
157-
write_future = std::move(appended);
158-
}
159-
160-
write_ertr::future<> take_write_future() {
161-
auto ret = std::move(write_future);
162-
write_future = write_ertr::now();
163-
return ret;
164-
}
165-
166147
template <typename T>
167148
seastar::future<> enter(T &t) {
168149
return op->enter(t);
169150
}
170151

171152
void exit() {
172153
op->exit();
173-
174-
auto ignore_writes = std::move(write_future);
175-
std::ignore = ignore_writes;
176-
write_future = write_ertr::now();
177154
}
178155

179156
seastar::future<> complete() {
@@ -182,9 +159,6 @@ struct OrderingHandle {
182159

183160
~OrderingHandle() {
184161
maybe_release_collection_lock();
185-
186-
assert(write_future.available());
187-
assert(!write_future.failed());
188162
}
189163
};
190164

0 commit comments

Comments
 (0)