Skip to content

Commit 29d6eac

Browse files
authored
Merge pull request ceph#60916 from shashalu/fix-data-corruption-ETIMEDOUT
rgw: fix data corruption when rados op return ETIMEDOUT Reviewed-by: Casey Bodley <[email protected]>
2 parents 51f4a96 + 0c578a1 commit 29d6eac

File tree

2 files changed

+20
-5
lines changed

2 files changed

+20
-5
lines changed

src/rgw/driver/rados/rgw_putobj_processor.cc

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -597,6 +597,11 @@ int MultipartObjectProcessor::complete(
597597
}
598598

599599
if (r < 0) {
600+
if (r == -ETIMEDOUT) {
601+
// The meta_obj_ref write may eventually succeed, clear the set of objects for deletion. if it
602+
// doesn't ever succeed, we'll orphan any tail objects as if we'd crashed before that write
603+
writer.clear_written();
604+
}
600605
return r == -ENOENT ? -ERR_NO_SUCH_UPLOAD : r;
601606
}
602607

@@ -783,6 +788,11 @@ int AppendObjectProcessor::complete(
783788
attrs, rctx, writer.get_trace(),
784789
flags & rgw::sal::FLAG_LOG_OP);
785790
if (r < 0) {
791+
if (r == -ETIMEDOUT) {
792+
// The head object write may eventually succeed, clear the set of objects for deletion. if it
793+
// doesn't ever succeed, we'll orphan any tail objects as if we'd crashed before that write
794+
writer.clear_written();
795+
}
786796
return r;
787797
}
788798
if (!obj_op.meta.canceled) {

src/rgw/driver/rados/rgw_rados.cc

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -3390,12 +3390,17 @@ int RGWRados::Object::Write::_do_write_meta(uint64_t size, uint64_t accounted_si
33903390
return 0;
33913391

33923392
done_cancel:
3393-
int ret = index_op->cancel(rctx.dpp, meta.remove_objs, rctx.y, log_op);
3394-
if (ret < 0) {
3395-
ldpp_dout(rctx.dpp, 0) << "ERROR: index_op.cancel() returned ret=" << ret << dendl;
3396-
}
3393+
// if r == -ETIMEDOUT, rgw can't determine whether or not the rados op succeeded
3394+
// we shouldn't be calling index_op->cancel() in this case
3395+
// Instead, we should leave that pending entry in the index so than bucket listing can recover with check_disk_state() and cls_rgw_suggest_changes()
3396+
if (r != -ETIMEDOUT) {
3397+
int ret = index_op->cancel(rctx.dpp, meta.remove_objs, rctx.y, log_op);
3398+
if (ret < 0) {
3399+
ldpp_dout(rctx.dpp, 0) << "ERROR: index_op.cancel() returned ret=" << ret << dendl;
3400+
}
33973401

3398-
meta.canceled = true;
3402+
meta.canceled = true;
3403+
}
33993404

34003405
/* we lost in a race. There are a few options:
34013406
* - existing object was rewritten (ECANCELED)

0 commit comments

Comments
 (0)