Skip to content

Commit ef96bb0

Browse files
committed
rgw/cloud-restore: Support restoration of objects transitioned to Glacier/Tape endpoint
Restoration of objects from certain cloud services (like Glacier/Tape) could take significant amount of time (even days). Hence store the state of such restore requests and periodically process them. Brief summary of changes * Refactored existing restore code to consolidate and move all restore processing into rgw_restore* file/class * RGWRestore class is defined to manage the restoration of objects. * Lastly, for SAL_RADOS, FIFO is used to store and read restore entries. Currently, this PR handles storing state of restore requests sent to cloud-glacier tier-type which need async processing. The changes are tested with AWS Glacier Flexible Retrieval with tier_type Expedited and Standard. Reviewed-by: Matt Benjamin <[email protected]> Reviewed-by: Adam Emerson <[email protected]> Reviewed-by: Jiffin Tony Thottan <[email protected]> Reviewed-by: Daniel Gryniewicz <[email protected]> Signed-off-by: Soumya Koduri <[email protected]>
1 parent d570f85 commit ef96bb0

35 files changed

+1476
-237
lines changed

src/common/options/rgw.yaml.in

Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -230,6 +230,7 @@ options:
230230
see_also:
231231
- rgw_enable_gc_threads
232232
- rgw_enable_lc_threads
233+
- rgw_enable_restore_threads
233234
with_legacy: true
234235
- name: rgw_enable_gc_threads
235236
type: bool
@@ -246,6 +247,7 @@ options:
246247
see_also:
247248
- rgw_enable_quota_threads
248249
- rgw_enable_lc_threads
250+
- rgw_enable_restore_threads
249251
with_legacy: true
250252
- name: rgw_enable_lc_threads
251253
type: bool
@@ -263,6 +265,24 @@ options:
263265
see_also:
264266
- rgw_enable_gc_threads
265267
- rgw_enable_quota_threads
268+
- rgw_enable_restore_threads
269+
with_legacy: true
270+
- name: rgw_enable_restore_threads
271+
type: bool
272+
level: advanced
273+
desc: Enables the objects' restore maintenance thread.
274+
long_desc: The objects restore maintenance thread is responsible for all the objects
275+
restoration related maintenance work. The thread itself can be disabled, but in order
276+
for the restore from the cloud to work correctly, at least one RGW in each zone needs
277+
to have this thread running. Having the thread enabled on multiple RGW processes within
278+
the same zone can spread some of the maintenance work between them.
279+
default: true
280+
services:
281+
- rgw
282+
see_also:
283+
- rgw_enable_gc_threads
284+
- rgw_enable_quota_threads
285+
- rgw_enable_lc_threads
266286
with_legacy: true
267287
- name: rgw_data
268288
type: str
@@ -475,6 +495,35 @@ options:
475495
services:
476496
- rgw
477497
with_legacy: true
498+
- name: rgw_restore_max_objs
499+
type: int
500+
level: advanced
501+
desc: Number of shards for restore processing
502+
long_desc: Number of RADOS objects to use for storing restore entries which are in progress. This affects concurrency of restore maintenance, as shards can be processed in parallel.
503+
default: 32
504+
services:
505+
- rgw
506+
with_legacy: true
507+
- name: rgw_restore_lock_max_time
508+
type: int
509+
level: dev
510+
default: 90
511+
services:
512+
- rgw
513+
see_also:
514+
with_legacy: true
515+
- name: rgw_restore_processor_period
516+
type: int
517+
level: advanced
518+
desc: Restore cycle run time
519+
long_desc: The amount of time between the start of consecutive runs of the restore
520+
processing threads. If the thread runs takes more than this period, it will
521+
not wait before running again.
522+
fmt_desc: The cycle time for restore state processing.
523+
default: 15_min
524+
services:
525+
- rgw
526+
with_legacy: true
478527
- name: rgw_mp_lock_max_time
479528
type: int
480529
level: advanced
@@ -1270,6 +1319,14 @@ options:
12701319
services:
12711320
- rgw
12721321
with_legacy: true
1322+
- name: rgw_nfs_run_restore_threads
1323+
type: bool
1324+
level: advanced
1325+
desc: run objects' restore threads in librgw (default off)
1326+
default: false
1327+
services:
1328+
- rgw
1329+
with_legacy: true
12731330
- name: rgw_nfs_run_sync_thread
12741331
type: bool
12751332
level: advanced

src/common/subsys.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,7 @@ SUBSYS(rgw_access, 1, 5)
6666
SUBSYS(rgw_dbstore, 1, 5)
6767
SUBSYS(rgw_flight, 1, 5)
6868
SUBSYS(rgw_lifecycle, 1, 5)
69+
SUBSYS(rgw_restore, 1, 5)
6970
SUBSYS(rgw_notification, 1, 5)
7071
SUBSYS(javaclient, 1, 5)
7172
SUBSYS(asok, 1, 5)

src/rgw/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,7 @@ set(librgw_common_srcs
6666
rgw_ldap.cc
6767
rgw_lc.cc
6868
rgw_lc_s3.cc
69+
rgw_restore.cc
6970
rgw_metadata.cc
7071
rgw_multi.cc
7172
rgw_multi_del.cc

src/rgw/driver/daos/rgw_sal_daos.cc

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1028,15 +1028,13 @@ int DaosObject::transition_to_cloud(
10281028

10291029
int DaosObject::restore_obj_from_cloud(Bucket* bucket,
10301030
rgw::sal::PlacementTier* tier,
1031-
rgw_placement_rule& placement_rule,
1032-
rgw_bucket_dir_entry& o,
10331031
CephContext* cct,
10341032
RGWObjTier& tier_config,
10351033
uint64_t olh_epoch,
10361034
std::optional<uint64_t> days,
1035+
bool& in_progress,
10371036
const DoutPrefixProvider* dpp,
1038-
optional_yield y,
1039-
uint32_t flags)
1037+
optional_yield y)
10401038
{
10411039
return DAOS_NOT_IMPLEMENTED_LOG(dpp);
10421040
}
@@ -2321,6 +2319,12 @@ std::unique_ptr<Lifecycle> DaosStore::get_lifecycle(void) {
23212319
return 0;
23222320
}
23232321

2322+
std::unique_ptr<Restore> DaosStore::get_restore(const int n_objs,
2323+
const std::vector<std::string_view>& obj_names) {
2324+
DAOS_NOT_IMPLEMENTED_LOG(nullptr);
2325+
return 0;
2326+
}
2327+
23242328
bool DaosStore::process_expired_objects(const DoutPrefixProvider *dpp,
23252329
optional_yield y) {
23262330
DAOS_NOT_IMPLEMENTED_LOG(nullptr);

src/rgw/driver/daos/rgw_sal_daos.h

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -654,15 +654,13 @@ class DaosObject : public StoreObject {
654654
optional_yield y) override;
655655
virtual int restore_obj_from_cloud(Bucket* bucket,
656656
rgw::sal::PlacementTier* tier,
657-
rgw_placement_rule& placement_rule,
658-
rgw_bucket_dir_entry& o,
659657
CephContext* cct,
660658
RGWObjTier& tier_config,
661659
uint64_t olh_epoch,
662660
std::optional<uint64_t> days,
661+
bool& in_progress,
663662
const DoutPrefixProvider* dpp,
664-
optional_yield y,
665-
uint32_t flags) override;
663+
optional_yield y) override;
666664
virtual bool placement_rules_match(rgw_placement_rule& r1,
667665
rgw_placement_rule& r2) override;
668666
virtual int dump_obj_layout(const DoutPrefixProvider* dpp, optional_yield y,
@@ -937,6 +935,8 @@ class DaosStore : public StoreDriver {
937935
virtual std::string zone_unique_trans_id(const uint64_t unique_num) override;
938936
virtual int cluster_stat(RGWClusterStat& stats) override;
939937
virtual std::unique_ptr<Lifecycle> get_lifecycle(void) override;
938+
virtual std::unique_ptr<Restore> get_restore(const int n_objs,
939+
const std::vector<std::string_view>& obj_names) override;
940940
virtual bool process_expired_objects(const DoutPrefixProvider *dpp, optional_yield y) override;
941941
virtual std::unique_ptr<Notification> get_notification(
942942
rgw::sal::Object* obj, rgw::sal::Object* src_obj, struct req_state* s,
@@ -953,6 +953,7 @@ class DaosStore : public StoreDriver {
953953
std::string& _req_id,
954954
optional_yield y) override;
955955
virtual RGWLC* get_rgwlc(void) override { return NULL; }
956+
virtual RGWRestore* get_rgwrestore(void) override { return NULL; }
956957
virtual RGWCoroutinesManagerRegistry* get_cr_registry() override {
957958
return NULL;
958959
}

src/rgw/driver/motr/rgw_sal_motr.cc

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3335,6 +3335,11 @@ std::unique_ptr<Lifecycle> MotrStore::get_lifecycle(void)
33353335
return 0;
33363336
}
33373337

3338+
std::unique_ptr<Restore> MotrStore::get_restore(const int n_objs,
3339+
const std::vector<std::string_view>& obj_names) {
3340+
return 0;
3341+
}
3342+
33383343
bool MotrStore::process_expired_objects(const DoutPrefixProvider *dpp,
33393344
optional_yield y)
33403345
{

src/rgw/driver/motr/rgw_sal_motr.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1006,6 +1006,8 @@ class MotrStore : public StoreDriver {
10061006
virtual int list_all_zones(const DoutPrefixProvider* dpp, std::list<std::string>& zone_ids) override;
10071007
virtual int cluster_stat(RGWClusterStat& stats) override;
10081008
virtual std::unique_ptr<Lifecycle> get_lifecycle(void) override;
1009+
virtual std::unique_ptr<Restore> get_restore(const int n_objs,
1010+
const std::vector<std::string_view>& obj_names) override;
10091011
virtual bool process_expired_objects(const DoutPrefixProvider *dpp, optional_yield y) override;
10101012
virtual std::unique_ptr<Notification> get_notification(rgw::sal::Object* obj, rgw::sal::Object* src_obj,
10111013
req_state* s, rgw::notify::EventType event_type, optional_yield y, const std::string* object_name=nullptr) override;
@@ -1020,6 +1022,7 @@ class MotrStore : public StoreDriver {
10201022
std::string& _req_id,
10211023
optional_yield y) override;
10221024
virtual RGWLC* get_rgwlc(void) override { return NULL; }
1025+
virtual RGWRestore* get_rgwrestore(void) override { return NULL; }
10231026
virtual RGWCoroutinesManagerRegistry* get_cr_registry() override { return NULL; }
10241027

10251028
virtual int log_usage(const DoutPrefixProvider *dpp, std::map<rgw_user_bucket, RGWUsageBatch>& usage_info) override;

src/rgw/driver/posix/rgw_sal_posix.cc

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -3071,15 +3071,11 @@ int POSIXObject::transition_to_cloud(Bucket* bucket,
30713071

30723072
int POSIXObject::restore_obj_from_cloud(Bucket* bucket,
30733073
rgw::sal::PlacementTier* tier,
3074-
rgw_placement_rule& placement_rule,
3075-
rgw_bucket_dir_entry& o,
30763074
CephContext* cct,
3077-
RGWObjTier& tier_config,
3078-
uint64_t olh_epoch,
30793075
std::optional<uint64_t> days,
3076+
bool& in_progress,
30803077
const DoutPrefixProvider* dpp,
3081-
optional_yield y,
3082-
uint32_t flags)
3078+
optional_yield y)
30833079
{
30843080
return -ERR_NOT_IMPLEMENTED;
30853081
}

src/rgw/driver/posix/rgw_sal_posix.h

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -697,15 +697,11 @@ class POSIXObject : public StoreObject {
697697
optional_yield y) override;
698698
virtual int restore_obj_from_cloud(Bucket* bucket,
699699
rgw::sal::PlacementTier* tier,
700-
rgw_placement_rule& placement_rule,
701-
rgw_bucket_dir_entry& o,
702700
CephContext* cct,
703-
RGWObjTier& tier_config,
704-
uint64_t olh_epoch,
705701
std::optional<uint64_t> days,
702+
bool& in_progress,
706703
const DoutPrefixProvider* dpp,
707-
optional_yield y,
708-
uint32_t flags) override;
704+
optional_yield y) override;
709705
virtual bool placement_rules_match(rgw_placement_rule& r1, rgw_placement_rule& r2) override;
710706
virtual int dump_obj_layout(const DoutPrefixProvider *dpp, optional_yield y, Formatter* f) override;
711707
virtual int swift_versioning_restore(const ACLOwner& owner, const rgw_user& remote_user, bool& restored,

src/rgw/driver/rados/rgw_lc_tier.cc

Lines changed: 21 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -260,6 +260,7 @@ int rgw_cloud_tier_restore_object(RGWLCCloudTierCtx& tier_ctx,
260260
uint64_t& accounted_size, rgw::sal::Attrs& attrs,
261261
std::optional<uint64_t> days,
262262
RGWZoneGroupTierS3Glacier& glacier_params,
263+
bool& in_progress,
263264
void* cb) {
264265
RGWRESTConn::get_obj_params req_params;
265266
std::string target_obj_name;
@@ -276,25 +277,23 @@ int rgw_cloud_tier_restore_object(RGWLCCloudTierCtx& tier_ctx,
276277
target_obj_name += get_key_instance(tier_ctx.obj->get_key());
277278
}
278279

279-
if (glacier_params.glacier_restore_tier_type != GlacierRestoreTierType::Expedited) {
280-
//XXX: Supporting STANDARD tier type is still in WIP
281-
ldpp_dout(tier_ctx.dpp, -1) << __func__ << "ERROR: Only Expedited tier_type is supported " << dendl;
282-
return -1;
283-
}
280+
if (!in_progress) { // first time. Send RESTORE req.
284281

285-
rgw_obj dest_obj(dest_bucket, rgw_obj_key(target_obj_name));
282+
rgw_obj dest_obj(dest_bucket, rgw_obj_key(target_obj_name));
283+
ret = cloud_tier_restore(tier_ctx.dpp, tier_ctx.conn, dest_obj, days, glacier_params);
286284

287-
ret = cloud_tier_restore(tier_ctx.dpp, tier_ctx.conn, dest_obj, days, glacier_params);
288-
289-
ldpp_dout(tier_ctx.dpp, 20) << __func__ << "Restoring object=" << dest_obj << "returned ret = " << ret << dendl;
290-
291-
if (ret < 0 ) {
292-
ldpp_dout(tier_ctx.dpp, -1) << __func__ << "ERROR: failed to restore object=" << dest_obj << "; ret = " << ret << dendl;
293-
return ret;
285+
ldpp_dout(tier_ctx.dpp, 20) << __func__ << "Restoring object=" << target_obj_name << "returned ret = " << ret << dendl;
286+
287+
if (ret < 0 ) {
288+
ldpp_dout(tier_ctx.dpp, -1) << __func__ << "ERROR: failed to restore object=" << dest_obj << "; ret = " << ret << dendl;
289+
return ret;
290+
}
291+
in_progress = true;
294292
}
295293

296294
// now send HEAD request and verify if restore is complete on glacier/tape endpoint
297-
bool restore_in_progress = false;
295+
static constexpr int MAX_RETRIES = 10;
296+
uint32_t retries = 0;
298297
do {
299298
ret = rgw_cloud_tier_get_object(tier_ctx, true, headers, nullptr, etag,
300299
accounted_size, attrs, nullptr);
@@ -304,8 +303,14 @@ int rgw_cloud_tier_restore_object(RGWLCCloudTierCtx& tier_ctx,
304303
return ret;
305304
}
306305

307-
restore_in_progress = is_restore_in_progress(tier_ctx.dpp, headers);
308-
} while(restore_in_progress);
306+
in_progress = is_restore_in_progress(tier_ctx.dpp, headers);
307+
308+
} while(retries++ < MAX_RETRIES && in_progress);
309+
310+
if (in_progress) {
311+
ldpp_dout(tier_ctx.dpp, 20) << __func__ << "Restoring object=" << target_obj_name << " still in progress; returning " << dendl;
312+
return 0;
313+
}
309314

310315
// now do the actual GET
311316
ret = rgw_cloud_tier_get_object(tier_ctx, false, headers, pset_mtime, etag,

0 commit comments

Comments
 (0)