Skip to content

Commit f5e3d9f

Browse files
authored
DPE-4200 Scale-up from zero (#414)
* (WIP) detect and treat scale up from zero * scale up from zero * bump outdated lib * fix base data_keys * postpone secondaries to after leader/primary
1 parent 6a36abc commit f5e3d9f

File tree

2 files changed

+58
-2
lines changed

2 files changed

+58
-2
lines changed

src/charm.py

Lines changed: 35 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -261,6 +261,19 @@ def unit_address(self) -> str:
261261
"""Return the address of this unit."""
262262
return self.get_unit_address()
263263

264+
@property
265+
def is_new_unit(self) -> bool:
266+
"""Return whether the unit is a clean state.
267+
268+
e.g. scaling from zero units
269+
"""
270+
_default_unit_data_keys = {
271+
"egress-subnets",
272+
"ingress-address",
273+
"private-address",
274+
}
275+
return self.unit_peer_data.keys() == _default_unit_data_keys
276+
264277
def get_unit_hostname(self, unit_name: Optional[str] = None) -> str:
265278
"""Get the hostname.localdomain for a unit.
266279
@@ -424,8 +437,6 @@ def _reconcile_pebble_layer(self, container: Container) -> None:
424437
):
425438
container.stop(MYSQLD_EXPORTER_SERVICE)
426439

427-
self._on_update_status(None)
428-
429440
def _restart(self, event: EventBase) -> None:
430441
"""Restart the service."""
431442
if self.peers.units != self.restart_peers.units:
@@ -702,6 +713,21 @@ def _on_mysql_pebble_ready(self, event) -> None:
702713
self.unit.status = MaintenanceStatus("Starting mysqld")
703714
logger.info("Data directory is already initialised, skipping configuration")
704715
self._reconcile_pebble_layer(container)
716+
if self.is_new_unit:
717+
# when unit is new and has data, it means the app is scaling out
718+
# from zero units
719+
logger.info("Scaling out from zero units")
720+
if self.unit.is_leader():
721+
# create the cluster due it being dissolved on scale-down
722+
self.create_cluster()
723+
self._on_update_status(None)
724+
else:
725+
# Non-leader units try to join cluster
726+
self.unit.status = WaitingStatus("Waiting for instance to join the cluster")
727+
self.unit_peer_data.update({
728+
"member-role": "secondary",
729+
"member-state": "waiting",
730+
})
705731
return
706732

707733
self.unit.status = MaintenanceStatus("Initialising mysqld")
@@ -845,10 +871,12 @@ def _on_update_status(self, _: Optional[UpdateStatusEvent]) -> None:
845871
if not self.unit.is_leader() and self._is_unit_waiting_to_join_cluster():
846872
# join cluster test takes precedence over blocked test
847873
# due to matching criteria
874+
logger.info("Attempting to join cluster")
848875
self.join_unit_to_cluster()
849876
return
850877

851878
if self._is_cluster_blocked():
879+
logger.info("Cluster is blocked. Skipping.")
852880
return
853881
del self.restart_peers.data[self.unit]["state"]
854882

@@ -926,6 +954,11 @@ def _on_database_storage_detaching(self, _) -> None:
926954
# Inform other hooks of current status
927955
self.unit_peer_data["unit-status"] = "removing"
928956

957+
if self.unit.is_leader():
958+
# Update 'units-added-to-cluster' counter in the peer relation databag
959+
units = int(self.app_peer_data.get("units-added-to-cluster", 1))
960+
self.app_peer_data["units-added-to-cluster"] = str(units - 1)
961+
929962

930963
if __name__ == "__main__":
931964
main(MySQLOperatorCharm)

tests/integration/test_charm.py

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -201,6 +201,29 @@ async def test_scale_up_after_scale_down(ops_test: OpsTest) -> None:
201201
assert len(online_member_addresses) == 3
202202

203203

204+
@pytest.mark.group(1)
205+
@pytest.mark.abort_on_fail
206+
async def test_scale_up_from_zero(ops_test: OpsTest) -> None:
207+
"""Ensure scaling down to zero and back up works."""
208+
await scale_application(ops_test, APP_NAME, 0)
209+
210+
await ops_test.model.block_until(
211+
lambda: len(ops_test.model.applications[APP_NAME].units) == 0,
212+
timeout=TIMEOUT,
213+
)
214+
215+
await scale_application(ops_test, APP_NAME, 3)
216+
217+
random_unit = ops_test.model.applications[APP_NAME].units[0]
218+
cluster_status = await get_cluster_status(random_unit)
219+
online_member_addresses = [
220+
member["address"]
221+
for _, member in cluster_status["defaultreplicaset"]["topology"].items()
222+
if member["status"] == "online"
223+
]
224+
assert len(online_member_addresses) == 3
225+
226+
204227
@pytest.mark.group(1)
205228
@pytest.mark.abort_on_fail
206229
async def test_password_rotation(ops_test: OpsTest):

0 commit comments

Comments
 (0)