Skip to content

Commit 71882f6

Browse files
committed
rgw/restore: Persistently store the restore state for cloud-s3 tier
In order to resume IN_PROGRESS restore operations post RGW service restarts, store the entries of the objects being restored from `cloud-s3` tier persistently. This is already being done for `cloud-s3-glacier` tier and now the same will be applied to `cloud-s3` tier too. With this change, when `restore-object` is performed on any object, it will be marked RESTORE_ALREADY_IN_PROGRESS and added to a restore FIFO queue. This queue is later processed by Restore worker thread which will try to fetch the objects from Cloud or Glacier/Tape S3 services. Hence all the restore operations are now handled asynchronously (for both `cloud-s3`, `cloud-s3-glacier` tiers). Signed-off-by: Soumya Koduri <[email protected]>
1 parent 4876523 commit 71882f6

File tree

2 files changed

+23
-37
lines changed

2 files changed

+23
-37
lines changed

src/rgw/driver/rados/rgw_rados.cc

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5554,6 +5554,7 @@ int RGWRados::restore_obj_from_cloud(RGWLCCloudTierCtx& tier_ctx,
55545554
ret = rgw_cloud_tier_get_object(tier_ctx, false, headers,
55555555
&set_mtime, etag, accounted_size,
55565556
attrs, &cb);
5557+
in_progress = false;
55575558
}
55585559

55595560
if (ret < 0) {

src/rgw/rgw_restore.cc

Lines changed: 22 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -439,6 +439,7 @@ int Restore::process_restore_entry(RestoreEntry& entry, optional_yield y)
439439
using ceph::decode;
440440
decode(restore_status, iter);
441441
}
442+
// check if its still in Progress state
442443
if (restore_status != rgw::sal::RGWRestoreStatus::RestoreAlreadyInProgress) {
443444
ldpp_dout(this, 5) << __PRETTY_FUNCTION__ << ": Restore of object " << obj->get_key()
444445
<< " not in progress state" << dendl;
@@ -625,52 +626,36 @@ int Restore::restore_obj_from_cloud(rgw::sal::Bucket* pbucket,
625626
return ret;
626627
}
627628

628-
// now go ahead with restoring object
629-
bool in_progress = false;
630-
ret = pobj->restore_obj_from_cloud(pbucket, tier, cct, days, in_progress, dpp, y);
629+
// now add the entry to the restore list to be processed by Restore worker thread
630+
// asynchronoudly
631+
RestoreEntry entry;
632+
entry.bucket = pbucket->get_key();
633+
entry.obj_key = pobj->get_key();
634+
entry.status = rgw::sal::RGWRestoreStatus::RestoreAlreadyInProgress;
635+
entry.days = days;
636+
entry.zone_id = driver->get_zone()->get_id();
637+
638+
ldpp_dout(this, 10) << "Restore:: Adding restore entry of object(" << pobj->get_key() << ") entry: " << entry << dendl;
639+
640+
int index = choose_oid(entry);
641+
ldpp_dout(this, 10) << __PRETTY_FUNCTION__ << ": Adding restore entry of object(" << pobj->get_key() << ") entry: " << entry << ", to shard:" << obj_names[index] << dendl;
642+
643+
std::vector<rgw::restore::RestoreEntry> r_entries;
644+
r_entries.push_back(entry);
645+
ret = sal_restore->add_entries(this, y, index, r_entries);
631646

632647
if (ret < 0) {
633-
ldpp_dout(this, -1) << __PRETTY_FUNCTION__ << ": ERROR: object " << pobj->get_key() << " fetching failed" << ret << dendl;
634-
auto reset_ret = set_cloud_restore_status(this, pobj, y, rgw::sal::RGWRestoreStatus::RestoreFailed);
648+
ldpp_dout(this, -1) << __PRETTY_FUNCTION__ << ": ERROR: Adding restore entry of object(" << pobj->get_key() << ") failed" << ret << dendl;
635649

650+
auto reset_ret = set_cloud_restore_status(this, pobj, y, rgw::sal::RGWRestoreStatus::RestoreFailed);
636651
if (reset_ret < 0) {
637-
ldpp_dout(this, -1) << __PRETTY_FUNCTION__ << ": Setting restore status to RestoreFailed failed for object(" << pobj->get_key() << ") " << reset_ret << dendl;
652+
ldpp_dout(this, -1) << __PRETTY_FUNCTION__ << ": Setting restore status as RestoreFailed failed for object(" << pobj->get_key() << ") " << reset_ret << dendl;
638653
}
639654

640655
return ret;
641656
}
642657

643-
if (in_progress) {
644-
// add restore entry to the list
645-
RestoreEntry entry;
646-
entry.bucket = pbucket->get_key();
647-
entry.obj_key = pobj->get_key();
648-
entry.status = rgw::sal::RGWRestoreStatus::RestoreAlreadyInProgress;
649-
entry.days = days;
650-
entry.zone_id = driver->get_zone()->get_id();
651-
652-
ldpp_dout(this, 10) << "Restore:: Adding restore entry of object(" << pobj->get_key() << ") entry: " << entry << dendl;
653-
654-
int index = choose_oid(entry);
655-
ldpp_dout(this, 10) << __PRETTY_FUNCTION__ << ": Adding restore entry of object(" << pobj->get_key() << ") entry: " << entry << ", to shard:" << obj_names[index] << dendl;
656-
657-
std::vector<rgw::restore::RestoreEntry> r_entries;
658-
r_entries.push_back(entry);
659-
ret = sal_restore->add_entries(this, y, index, r_entries);
660-
661-
if (ret < 0) {
662-
ldpp_dout(this, -1) << __PRETTY_FUNCTION__ << ": ERROR: Adding restore entry of object(" << pobj->get_key() << ") failed" << ret << dendl;
663-
664-
auto reset_ret = set_cloud_restore_status(this, pobj, y, rgw::sal::RGWRestoreStatus::RestoreFailed);
665-
if (reset_ret < 0) {
666-
ldpp_dout(this, -1) << __PRETTY_FUNCTION__ << ": Setting restore status as RestoreFailed failed for object(" << pobj->get_key() << ") " << reset_ret << dendl;
667-
}
668-
669-
return ret;
670-
}
671-
}
672-
673-
ldpp_dout(this, 10) << __PRETTY_FUNCTION__ << ": Restore of object " << pobj->get_key() << (in_progress ? " is in progress" : " succeeded") << dendl;
658+
ldpp_dout(this, 10) << __PRETTY_FUNCTION__ << ": Restore of object " << pobj->get_key() << " is in progress." << dendl;
674659
return ret;
675660
}
676661

0 commit comments

Comments
 (0)