@@ -98,7 +98,8 @@ using extent_to_write_list_t = std::list<extent_to_write_t>;
9898// Encapsulates extents to be written out using do_remappings.
9999struct extent_to_remap_t {
100100 enum class type_t {
101- REMAP,
101+ REMAP1,
102+ REMAP2,
102103 OVERWRITE
103104 };
104105 type_t type;
@@ -114,54 +115,75 @@ struct extent_to_remap_t {
114115 extent_to_remap_t (const extent_to_remap_t &) = delete ;
115116 extent_to_remap_t (extent_to_remap_t &&) = default ;
116117
117- bool is_remap () const {
118- return type == type_t ::REMAP ;
118+ bool is_remap1 () const {
119+ return type == type_t ::REMAP1 ;
119120 }
120121
121- bool is_overwrite () const {
122+ bool is_remap2 () const {
122123 assert ((new_offset != 0 ) && (pin->get_length () != new_offset + new_len));
124+ return type == type_t ::REMAP2;
125+ }
126+
127+ bool is_overwrite () const {
123128 return type == type_t ::OVERWRITE;
124129 }
125130
126131 using remap_entry = TransactionManager::remap_entry;
127132 remap_entry create_remap_entry () {
128- assert (is_remap ());
133+ assert (is_remap1 ());
129134 return remap_entry (
130135 new_offset,
131136 new_len);
132137 }
133138
134139 remap_entry create_left_remap_entry () {
135- assert (is_overwrite ());
140+ assert (is_remap2 ());
136141 return remap_entry (
137142 0 ,
138143 new_offset);
139144 }
140145
141146 remap_entry create_right_remap_entry () {
142- assert (is_overwrite ());
147+ assert (is_remap2 ());
143148 return remap_entry (
144149 new_offset + new_len,
145150 pin->get_length () - new_offset - new_len);
146151 }
147152
148- static extent_to_remap_t create_remap (
153+ static extent_to_remap_t create_remap1 (
149154 LBAMappingRef &&pin, extent_len_t new_offset, extent_len_t new_len) {
150- return extent_to_remap_t (type_t ::REMAP ,
155+ return extent_to_remap_t (type_t ::REMAP1 ,
151156 std::move (pin), new_offset, new_len);
152157 }
153158
154- static extent_to_remap_t create_overwrite (
159+ static extent_to_remap_t create_remap2 (
155160 LBAMappingRef &&pin, extent_len_t new_offset, extent_len_t new_len) {
156- return extent_to_remap_t (type_t ::OVERWRITE ,
161+ return extent_to_remap_t (type_t ::REMAP2 ,
157162 std::move (pin), new_offset, new_len);
158163 }
159164
165+ static extent_to_remap_t create_overwrite (
166+ extent_len_t new_offset, extent_len_t new_len, LBAMappingRef p,
167+ bufferlist b) {
168+ return extent_to_remap_t (type_t ::OVERWRITE,
169+ nullptr , new_offset, new_len, p->get_key (), p->get_length (), b);
170+ }
171+
172+ uint64_t laddr_start;
173+ extent_len_t length;
174+ std::optional<bufferlist> bl;
175+
160176private:
161177 extent_to_remap_t (type_t type,
162178 LBAMappingRef &&pin, extent_len_t new_offset, extent_len_t new_len)
163179 : type(type),
164180 pin (std::move(pin)), new_offset(new_offset), new_len(new_len) {}
181+ extent_to_remap_t (type_t type,
182+ LBAMappingRef &&pin, extent_len_t new_offset, extent_len_t new_len,
183+ uint64_t ori_laddr, extent_len_t ori_len, std::optional<bufferlist> b)
184+ : type(type),
185+ pin(std::move(pin)), new_offset(new_offset), new_len(new_len),
186+ laddr_start(ori_laddr), length(ori_len), bl(b) {}
165187};
166188using extent_to_remap_list_t = std::list<extent_to_remap_t >;
167189
@@ -222,7 +244,8 @@ struct overwrite_ops_t {
222244// prepare to_remap, to_retire, to_insert list
223245overwrite_ops_t prepare_ops_list (
224246 lba_pin_list_t &pins_to_remove,
225- extent_to_write_list_t &to_write) {
247+ extent_to_write_list_t &to_write,
248+ size_t delta_based_overwrite_max_extent_size) {
226249 assert (pins_to_remove.size () != 0 );
227250 overwrite_ops_t ops;
228251 ops.to_remove .swap (pins_to_remove);
@@ -241,7 +264,7 @@ overwrite_ops_t prepare_ops_list(
241264 assert (to_write.size () > 2 );
242265 assert (front.addr == front.pin ->get_key ());
243266 assert (back.addr > back.pin ->get_key ());
244- ops.to_remap .push_back (extent_to_remap_t::create_overwrite (
267+ ops.to_remap .push_back (extent_to_remap_t::create_remap2 (
245268 std::move (front.pin ),
246269 front.len ,
247270 back.addr - front.addr - front.len ));
@@ -252,7 +275,7 @@ overwrite_ops_t prepare_ops_list(
252275 visitted++;
253276 assert (to_write.size () > 1 );
254277 assert (front.addr == front.pin ->get_key ());
255- ops.to_remap .push_back (extent_to_remap_t::create_remap (
278+ ops.to_remap .push_back (extent_to_remap_t::create_remap1 (
256279 std::move (front.pin ),
257280 0 ,
258281 front.len ));
@@ -263,28 +286,81 @@ overwrite_ops_t prepare_ops_list(
263286 assert (to_write.size () > 1 );
264287 assert (back.addr + back.len ==
265288 back.pin ->get_key () + back.pin ->get_length ());
266- ops.to_remap .push_back (extent_to_remap_t::create_remap (
289+ ops.to_remap .push_back (extent_to_remap_t::create_remap1 (
267290 std::move (back.pin ),
268291 back.addr - back.pin ->get_key (),
269292 back.len ));
270293 ops.to_remove .pop_back ();
271294 }
272295 }
273296
274- // prepare to_insert
297+ interval_set<uint64_t > pre_alloc_addr_removed, pre_alloc_addr_remapped;
298+ if (delta_based_overwrite_max_extent_size) {
299+ for (auto &r : ops.to_remove ) {
300+ if (r->is_stable () && !r->is_zero_reserved ()) {
301+ pre_alloc_addr_removed.insert (r->get_key (), r->get_length ());
302+
303+ }
304+ }
305+ for (auto &r : ops.to_remap ) {
306+ if (r.pin && r.pin ->is_stable () && !r.pin ->is_zero_reserved ()) {
307+ pre_alloc_addr_remapped.insert (r.pin ->get_key (), r.pin ->get_length ());
308+ }
309+ }
310+ }
311+
312+ // prepare to insert
313+ extent_to_remap_list_t to_remap;
275314 for (auto ®ion : to_write) {
276315 if (region.is_data ()) {
277316 visitted++;
278317 assert (region.to_write .has_value ());
279- ops.to_insert .push_back (extent_to_insert_t::create_data (
280- region.addr , region.len , region.to_write ));
318+ int erased_num = 0 ;
319+ if (pre_alloc_addr_removed.contains (region.addr , region.len ) &&
320+ region.len <= delta_based_overwrite_max_extent_size) {
321+ erased_num = std::erase_if (
322+ ops.to_remove ,
323+ [®ion, &to_remap](auto &r) {
324+ interval_set<uint64_t > range;
325+ range.insert (r->get_key (), r->get_length ());
326+ if (range.contains (region.addr , region.len ) && !r->is_clone ()) {
327+ to_remap.push_back (extent_to_remap_t::create_overwrite (
328+ 0 , region.len , std::move (r), *region.to_write ));
329+ return true ;
330+ }
331+ return false ;
332+ });
333+ // if the size of the region is wider than the ragne from the enry in to_remove,
334+ // we create a separated extent in the original way.
335+ } else if (pre_alloc_addr_remapped.contains (region.addr , region.len ) &&
336+ region.len <= delta_based_overwrite_max_extent_size) {
337+ erased_num = std::erase_if (
338+ ops.to_remap ,
339+ [®ion, &to_remap](auto &r) {
340+ interval_set<uint64_t > range;
341+ range.insert (r.pin ->get_key (), r.pin ->get_length ());
342+ if (range.contains (region.addr , region.len ) && !r.pin ->is_clone ()) {
343+ to_remap.push_back (extent_to_remap_t::create_overwrite (
344+ region.addr - range.begin ().get_start (), region.len ,
345+ std::move (r.pin ), *region.to_write ));
346+ return true ;
347+ }
348+ return false ;
349+ });
350+ assert (erased_num > 0 );
351+ }
352+ if (erased_num == 0 ) {
353+ ops.to_insert .push_back (extent_to_insert_t::create_data (
354+ region.addr , region.len , region.to_write ));
355+ }
281356 } else if (region.is_zero ()) {
282357 visitted++;
283358 assert (!(region.to_write .has_value ()));
284359 ops.to_insert .push_back (extent_to_insert_t::create_zero (
285360 region.addr , region.len ));
286361 }
287362 }
363+ ops.to_remap .splice (ops.to_remap .end (), to_remap);
288364
289365 logger ().debug (
290366 " to_remap list size: {}"
@@ -334,6 +410,22 @@ void splice_extent_to_write(
334410 }
335411}
336412
413+ ceph::bufferlist ObjectDataBlock::get_delta () {
414+ ceph::bufferlist bl;
415+ encode (delta, bl);
416+ return bl;
417+ }
418+
419+ void ObjectDataBlock::apply_delta (const ceph::bufferlist &bl) {
420+ auto biter = bl.begin ();
421+ decltype (delta) deltas;
422+ decode (deltas, biter);
423+ for (auto &&d : deltas) {
424+ auto iter = d.bl .cbegin ();
425+ iter.copy (d.len , get_bptr ().c_str () + d.offset );
426+ }
427+ }
428+
337429// / Creates remap extents in to_remap
338430ObjectDataHandler::write_ret do_remappings (
339431 context_t ctx,
@@ -342,7 +434,7 @@ ObjectDataHandler::write_ret do_remappings(
342434 return trans_intr::do_for_each (
343435 to_remap,
344436 [ctx](auto ®ion) {
345- if (region.is_remap ()) {
437+ if (region.is_remap1 ()) {
346438 return ctx.tm .remap_pin <ObjectDataBlock, 1 >(
347439 ctx.t ,
348440 std::move (region.pin ),
@@ -355,6 +447,22 @@ ObjectDataHandler::write_ret do_remappings(
355447 return ObjectDataHandler::write_iertr::now ();
356448 });
357449 } else if (region.is_overwrite ()) {
450+ return ctx.tm .get_mutable_extent_by_laddr <ObjectDataBlock>(
451+ ctx.t ,
452+ region.laddr_start ,
453+ region.length
454+ ).handle_error_interruptible (
455+ TransactionManager::base_iertr::pass_further{},
456+ crimson::ct_error::assert_all{
457+ " ObjectDataHandler::do_remapping hit invalid error"
458+ }
459+ ).si_then ([®ion](auto extent) {
460+ extent_len_t off = region.new_offset ;
461+ assert (region.bl ->length () == region.new_len );
462+ extent->overwrite (off, *region.bl );
463+ return ObjectDataHandler::write_iertr::now ();
464+ });
465+ } else if (region.is_remap2 ()) {
358466 return ctx.tm .remap_pin <ObjectDataBlock, 2 >(
359467 ctx.t ,
360468 std::move (region.pin ),
@@ -960,7 +1068,7 @@ ObjectDataHandler::clear_ret ObjectDataHandler::trim_data_reservation(
9601068 return seastar::do_with (
9611069 lba_pin_list_t (),
9621070 extent_to_write_list_t (),
963- [ctx, size, &object_data](auto &pins, auto &to_write) {
1071+ [ctx, size, &object_data, this ](auto &pins, auto &to_write) {
9641072 LOG_PREFIX (ObjectDataHandler::trim_data_reservation);
9651073 DEBUGT (" object_data: {}~{}" ,
9661074 ctx.t ,
@@ -1038,9 +1146,10 @@ ObjectDataHandler::clear_ret ObjectDataHandler::trim_data_reservation(
10381146 });
10391147 }
10401148 }
1041- }).si_then ([ctx, size, &to_write, &object_data, &pins] {
1149+ }).si_then ([ctx, size, &to_write, &object_data, &pins, this ] {
10421150 return seastar::do_with (
1043- prepare_ops_list (pins, to_write),
1151+ prepare_ops_list (pins, to_write,
1152+ delta_based_overwrite_max_extent_size),
10441153 [ctx, size, &object_data](auto &ops) {
10451154 return do_remappings (ctx, ops.to_remap
10461155 ).si_then ([ctx, &ops] {
@@ -1162,7 +1271,7 @@ ObjectDataHandler::write_ret ObjectDataHandler::overwrite(
11621271 return seastar::do_with (
11631272 std::move (_pins),
11641273 extent_to_write_list_t (),
1165- [ctx, len, offset, overwrite_plan, bl=std::move (bl)]
1274+ [ctx, len, offset, overwrite_plan, bl=std::move (bl), this ]
11661275 (auto &pins, auto &to_write) mutable
11671276 {
11681277 LOG_PREFIX (ObjectDataHandler::overwrite);
@@ -1178,7 +1287,7 @@ ObjectDataHandler::write_ret ObjectDataHandler::overwrite(
11781287 pins.front (),
11791288 overwrite_plan
11801289 ).si_then ([ctx, len, offset, overwrite_plan, bl=std::move (bl),
1181- &to_write, &pins](auto p) mutable {
1290+ &to_write, &pins, this ](auto p) mutable {
11821291 auto &[left_extent, headptr] = p;
11831292 if (left_extent) {
11841293 ceph_assert (left_extent->addr == overwrite_plan.pin_begin );
@@ -1195,7 +1304,7 @@ ObjectDataHandler::write_ret ObjectDataHandler::overwrite(
11951304 pin_begin=overwrite_plan.pin_begin ,
11961305 pin_end=overwrite_plan.pin_end ,
11971306 bl=std::move (bl), headptr=std::move (headptr),
1198- &to_write, &pins](auto p) mutable {
1307+ &to_write, &pins, this ](auto p) mutable {
11991308 auto &[right_extent, tailptr] = p;
12001309 if (bl.has_value ()) {
12011310 auto write_offset = offset;
@@ -1232,7 +1341,8 @@ ObjectDataHandler::write_ret ObjectDataHandler::overwrite(
12321341 assert (pin_end == to_write.back ().get_end_addr ());
12331342
12341343 return seastar::do_with (
1235- prepare_ops_list (pins, to_write),
1344+ prepare_ops_list (pins, to_write,
1345+ delta_based_overwrite_max_extent_size),
12361346 [ctx](auto &ops) {
12371347 return do_remappings (ctx, ops.to_remap
12381348 ).si_then ([ctx, &ops] {
0 commit comments