Skip to content

Commit 9974f51

Browse files
committed
rgw/cloud-restore: Handle failure with adding restore entry
In case adding restore entry to FIFO fails, reset the `restore_status` of that object as "RestoreFailed" so that restore process can be retried from the end S3 user. Reviewed-by: Adam Emerson <[email protected]> Reviewed-by: Jiffin Tony Thottan <[email protected]> Signed-off-by: Soumya Koduri <[email protected]>
1 parent ef96bb0 commit 9974f51

File tree

3 files changed

+31
-19
lines changed

3 files changed

+31
-19
lines changed

src/rgw/rgw_op.cc

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1080,15 +1080,15 @@ int handle_cloudtier_obj(req_state* s, const DoutPrefixProvider *dpp, rgw::sal::
10801080
}
10811081

10821082
op_ret = driver->get_rgwrestore()->restore_obj_from_cloud(s->bucket.get(),
1083-
s->object.get(), tier.get(), days, y);
1083+
s->object.get(), tier.get(), days, dpp, y);
10841084

10851085
if (op_ret < 0) {
10861086
ldpp_dout(dpp, 0) << "Restore of object " << s->object->get_key() << " failed" << op_ret << dendl;
10871087
s->err.message = "failed to restore object";
10881088
return op_ret;
10891089
}
10901090

1091-
ldpp_dout(dpp, 20) << "Restore of object " << s->object->get_key() << " succeed" << dendl;
1091+
ldpp_dout(dpp, 20) << "Restore of object " << s->object->get_key() << " initiated" << dendl;
10921092
/* Even if restore is complete the first read through request will return
10931093
* but actually downloaded object asyncronously.
10941094
*/

src/rgw/rgw_restore.cc

Lines changed: 26 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -193,7 +193,7 @@ void *RGWRestore::RestoreWorker::entry() {
193193
int r = 0;
194194
r = restore->process(this, null_yield);
195195
if (r < 0) {
196-
ldpp_dout(dpp, 0) << "ERROR: restore process() returned error r=" << r << dendl;
196+
ldpp_dout(dpp, -1) << "ERROR: restore process() returned error r=" << r << dendl;
197197
}
198198
if (restore->going_down())
199199
break;
@@ -316,14 +316,14 @@ int RGWRestore::process(int index, int max_secs, optional_yield y)
316316
}
317317
ret = sal_restore->trim_entries(this, y, index, marker);
318318
if (ret < 0) {
319-
ldpp_dout(this, 0) << "RGWRestore::process() failed to trim entries on "
319+
ldpp_dout(this, -1) << "ERROR: RGWRestore::process() failed to trim entries on "
320320
<< obj_names[index] << dendl;
321321
}
322322

323323
if (!r_entries.empty()) {
324324
ret = sal_restore->add_entries(this, y, index, r_entries);
325325
if (ret < 0) {
326-
ldpp_dout(this, 0) << "RGWRestore::process() failed to add entries on "
326+
ldpp_dout(this, -1) << "ERROR: RGWRestore::process() failed to add entries on "
327327
<< obj_names[index] << dendl;
328328
}
329329
}
@@ -358,7 +358,7 @@ int RGWRestore::process_restore_entry(RGWRestoreEntry& entry, optional_yield y)
358358
// bucket, obj, days, state=in_progress
359359
ret = driver->load_bucket(this, entry.bucket, &bucket, null_yield);
360360
if (ret < 0) {
361-
ldpp_dout(this, 0) << "Restore:get_bucket for " << bucket->get_name()
361+
ldpp_dout(this, -1) << "ERROR: Restore:get_bucket for " << bucket->get_name()
362362
<< " failed" << dendl;
363363
return ret;
364364
}
@@ -367,7 +367,7 @@ int RGWRestore::process_restore_entry(RGWRestoreEntry& entry, optional_yield y)
367367
ret = obj->load_obj_state(this, null_yield, true);
368368

369369
if (ret < 0) {
370-
ldpp_dout(this, 0) << "Restore:get_object for " << entry.obj_key
370+
ldpp_dout(this, 0) << "ERROR: Restore:get_object for " << entry.obj_key
371371
<< " failed" << dendl;
372372
return ret;
373373
}
@@ -385,7 +385,7 @@ int RGWRestore::process_restore_entry(RGWRestoreEntry& entry, optional_yield y)
385385
}
386386
if (restore_status == rgw::sal::RGWRestoreStatus::CloudRestored) {
387387
// XXX: Check if expiry-date needs to be update
388-
ldpp_dout(this, 20) << "Restore of object " << obj->get_key() << " already done" << dendl;
388+
ldpp_dout(this, 5) << "Restore of object " << obj->get_key() << " already done" << dendl;
389389
entry.status = rgw::sal::RGWRestoreStatus::CloudRestored;
390390
return 0;
391391
}
@@ -397,7 +397,7 @@ int RGWRestore::process_restore_entry(RGWRestoreEntry& entry, optional_yield y)
397397
ret = driver->get_zone()->get_zonegroup().get_placement_tier(target_placement, &tier);
398398

399399
if (ret < 0) {
400-
ldpp_dout(this, -1) << "failed to fetch tier placement handle, ret = " << ret << dendl;
400+
ldpp_dout(this, -1) << "ERROR: failed to fetch tier placement handle, ret = " << ret << dendl;
401401
return ret;
402402
} else {
403403
ldpp_dout(this, 20) << "getting tier placement handle cloud tier for " <<
@@ -426,10 +426,10 @@ int RGWRestore::process_restore_entry(RGWRestoreEntry& entry, optional_yield y)
426426
}
427427

428428
if (in_progress) {
429-
ldpp_dout(this, 20) << "Restore of object " << obj->get_key() << " still in progress" << dendl;
429+
ldpp_dout(this, 15) << "Restore of object " << obj->get_key() << " is still in progress" << dendl;
430430
entry.status = rgw::sal::RGWRestoreStatus::RestoreAlreadyInProgress;
431431
} else {
432-
ldpp_dout(this, 20) << "Restore of object " << obj->get_key() << " succeeded" << dendl;
432+
ldpp_dout(this, 15) << "Restore of object " << obj->get_key() << " succeeded" << dendl;
433433
entry.status = rgw::sal::RGWRestoreStatus::RestoreFailed;
434434
}
435435
return ret;
@@ -466,7 +466,9 @@ int RGWRestore::set_cloud_restore_status(const DoutPrefixProvider* dpp,
466466
int RGWRestore::restore_obj_from_cloud(rgw::sal::Bucket* pbucket,
467467
rgw::sal::Object* pobj,
468468
rgw::sal::PlacementTier* tier,
469-
std::optional<uint64_t> days, optional_yield y)
469+
std::optional<uint64_t> days,
470+
const DoutPrefixProvider* dpp,
471+
optional_yield y)
470472
{
471473
int ret = 0;
472474

@@ -484,20 +486,19 @@ int RGWRestore::restore_obj_from_cloud(rgw::sal::Bucket* pbucket,
484486

485487
// now go ahead with restoring object
486488
bool in_progress = false;
487-
ret = pobj->restore_obj_from_cloud(pbucket, tier, cct, days, in_progress, this, y);
489+
ret = pobj->restore_obj_from_cloud(pbucket, tier, cct, days, in_progress, dpp, y);
488490

489491
if (ret < 0) {
490-
ldpp_dout(this, 0) << "object " << pobj->get_key() << " fetching failed" << ret << dendl;
492+
ldpp_dout(this, 0) << "ERROR: object " << pobj->get_key() << " fetching failed" << ret << dendl;
491493
auto reset_ret = set_cloud_restore_status(this, pobj, y, rgw::sal::RGWRestoreStatus::RestoreFailed);
492494

493495
if (reset_ret < 0) {
494-
ldpp_dout(this, -1) << "Setting restore status ad RestoreFailed failed for object(" << pobj->get_key() << ") " << reset_ret << dendl;
496+
ldpp_dout(this, -1) << "Setting restore status to RestoreFailed failed for object(" << pobj->get_key() << ") " << reset_ret << dendl;
495497
}
496498

497499
return ret;
498500
}
499501

500-
ldpp_dout(this, 20) << "Restore of object " << pobj->get_key() << " succeeded" << dendl;
501502
if (in_progress) {
502503
// add restore entry to the list
503504
RGWRestoreEntry entry;
@@ -507,14 +508,23 @@ int RGWRestore::restore_obj_from_cloud(rgw::sal::Bucket* pbucket,
507508
entry.days = days;
508509
entry.zone_id = driver->get_zone()->get_id();
509510

511+
ldpp_dout(this, 10) << "RGWRestore:: Adding restore entry of object(" << pobj->get_key() << ") entry: " << entry << dendl;
512+
510513
int index = choose_oid(entry);
511514
ret = sal_restore->add_entry(this, y, index, entry);
512515

513516
if (ret < 0) {
514-
ldpp_dout(this, -1) << "Adding restore entry of object(" << pobj->get_key() << ") failed" << ret << dendl;
517+
ldpp_dout(this, -1) << "ERROR: Adding restore entry of object(" << pobj->get_key() << ") failed" << ret << dendl;
518+
519+
auto reset_ret = set_cloud_restore_status(this, pobj, y, rgw::sal::RGWRestoreStatus::RestoreFailed);
520+
if (reset_ret < 0) {
521+
ldpp_dout(this, -1) << "Setting restore status as RestoreFailed failed for object(" << pobj->get_key() << ") " << reset_ret << dendl;
522+
}
523+
524+
return ret;
515525
}
516526
}
517527

518-
ldpp_dout(this, 20) << "Restore of object " << pobj->get_key() << " succeeded" << dendl;
528+
ldpp_dout(this, 10) << "Restore of object " << pobj->get_key() << (in_progress ? " is in progress" : " succeeded") << dendl;
519529
return ret;
520530
}

src/rgw/rgw_restore.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -136,5 +136,7 @@ class RGWRestore : public DoutPrefixProvider {
136136
* to be procesed later by RestoreWorker thread. */
137137
int restore_obj_from_cloud(rgw::sal::Bucket* pbucket, rgw::sal::Object* pobj,
138138
rgw::sal::PlacementTier* tier,
139-
std::optional<uint64_t> days, optional_yield y);
139+
std::optional<uint64_t> days,
140+
const DoutPrefixProvider* dpp,
141+
optional_yield y);
140142
};

0 commit comments

Comments
 (0)