Skip to content

Commit 480253e

Browse files
author
Kritik Sachdeva
committed
mgr/rgw: Adding a retry config while calling zone_create()
Fixes https://tracker.ceph.com/issues/66750 Signed-off-by: Kritik Sachdeva <[email protected]>
1 parent 3b3242c commit 480253e

File tree

2 files changed

+34
-5
lines changed

2 files changed

+34
-5
lines changed

src/pybind/mgr/rgw/module.py

Lines changed: 14 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -101,7 +101,14 @@ def wrapper(self: Any, *args: Any, **kwargs: Any) -> HandleCommandResult:
101101

102102

103103
class Module(orchestrator.OrchestratorClientMixin, MgrModule):
104-
MODULE_OPTIONS: List[Option] = []
104+
MODULE_OPTIONS: List[Option] = [
105+
Option(
106+
'secondary_zone_period_retry_limit',
107+
type='int',
108+
default=5,
109+
desc='RGW module period update retry limit for secondary site'
110+
),
111+
]
105112

106113
# These are "native" Ceph options that this module cares about.
107114
NATIVE_OPTIONS: List[Option] = []
@@ -115,6 +122,9 @@ def __init__(self, *args: Any, **kwargs: Any):
115122
# ensure config options members are initialized; see config_notify()
116123
self.config_notify()
117124

125+
if TYPE_CHECKING:
126+
self.secondary_zone_period_retry_limit = 5
127+
118128
with self.lock:
119129
self.inited = True
120130
self.env = EnvArgs(RGWAMOrchMgr(self))
@@ -312,7 +322,7 @@ def _cmd_rgw_zone_create(self,
312322

313323
try:
314324
created_zones = self.rgw_zone_create(zone_name, realm_token, port, placement,
315-
start_radosgw, zone_endpoints, inbuf)
325+
start_radosgw, zone_endpoints, self.secondary_zone_period_retry_limit, inbuf)
316326
return HandleCommandResult(retval=0, stdout=f"Zones {', '.join(created_zones)} created successfully")
317327
except RGWAMException as e:
318328
return HandleCommandResult(retval=e.retcode, stderr=f'Failed to create zone: {str(e)}')
@@ -324,6 +334,7 @@ def rgw_zone_create(self,
324334
placement: Optional[Union[str, Dict[str, Any]]] = None,
325335
start_radosgw: Optional[bool] = True,
326336
zone_endpoints: Optional[str] = None,
337+
secondary_zone_period_retry_limit: Optional[int] = None,
327338
inbuf: Optional[str] = None) -> List[str]:
328339

329340
if inbuf:
@@ -350,7 +361,7 @@ def rgw_zone_create(self,
350361
try:
351362
created_zones = []
352363
for rgw_spec in rgw_specs:
353-
RGWAM(self.env).zone_create(rgw_spec, start_radosgw)
364+
RGWAM(self.env).zone_create(rgw_spec, start_radosgw, secondary_zone_period_retry_limit)
354365
if rgw_spec.rgw_zone is not None:
355366
created_zones.append(rgw_spec.rgw_zone)
356367
return created_zones

src/python-common/ceph/rgw/rgwam_core.py

Lines changed: 20 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -786,7 +786,7 @@ def get_realms_info(self):
786786
"secret": secret})
787787
return realms_info
788788

789-
def zone_create(self, rgw_spec, start_radosgw):
789+
def zone_create(self, rgw_spec, start_radosgw, secondary_zone_period_retry_limit=5):
790790

791791
if not rgw_spec.rgw_realm_token:
792792
raise RGWAMException('missing realm token')
@@ -823,7 +823,25 @@ def zone_create(self, rgw_spec, start_radosgw):
823823
zone = self.create_zone(realm, zonegroup, rgw_spec.rgw_zone,
824824
False, # secondary zone
825825
access_key, secret, endpoints=rgw_spec.zone_endpoints)
826-
self.update_period(realm, zonegroup, zone)
826+
827+
# Adding a retry limit for period update in case the default 10s timeout is not sufficient
828+
rgw_limit = 0
829+
830+
while rgw_limit != int(secondary_zone_period_retry_limit):
831+
try:
832+
self.update_period(realm, zonegroup, zone)
833+
break
834+
except RGWAMException as e:
835+
logging.info(f'Failed to update Period in 10s. Retrying with current limit \
836+
& retry-limit values {rgw_limit} {secondary_zone_period_retry_limit}')
837+
rgw_limit += 1
838+
if rgw_limit == secondary_zone_period_retry_limit:
839+
raise RGWAMException(f'Period Update failed for zone {zone}. \
840+
Exception raised while period update {e.message}')
841+
continue
842+
843+
# By default the above operation is expected to be completed in 10s timeout but if we
844+
# updating this for secondary site it would take some time because of pool creation
827845

828846
period = RGWPeriod(period_info)
829847
logging.debug(period.to_json())

0 commit comments

Comments
 (0)