Skip to content

Commit b4c51b0

Browse files
[DPE-2289] [DPE-2388] Upgrade from 14/stable and add integration tests (#210)
* Added initial upgrade implementation * Minor fixes * Adjusted code to correctly update the upgrade stack and speed up the unit initialisation * Minor fixes * Updated the code with the new library * Fixed upgrade logic to switchover to unit zero and added unit tests * Added backup creation check * Rollback dependencies * Fixed dependencies * Minor fixes * Final fixes * Removed upgrade integration tests * Pin charmcraft revision * Remove charmcraft pin * Removed hashes from requirements * Revert "Removed upgrade integration tests" This reverts commit c249b44. * Renamed overriden method * Upgraded test * Added replication health check and rock dependency * Comment test * Add additional upgrade logic for stable revision * Fix partially the upgrade from stable * Fix exporter startup * Fix metrics service start * Revert tests * Format * Lint * Add test for upgrade from stable * Add upgrade test * Minor fixes * Fix unit tests * Fix unit tests patches * PR feedback
1 parent b2dc69e commit b4c51b0

File tree

8 files changed

+543
-10
lines changed

8 files changed

+543
-10
lines changed

.github/workflows/ci.yaml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -86,6 +86,8 @@ jobs:
8686
- password-rotation-integration
8787
- plugins-integration
8888
- tls-integration
89+
- upgrade-integration
90+
- upgrade-from-stable-integration
8991
agent-versions:
9092
- "2.9.45" # renovate: latest juju 2
9193
- "3.1.5" # renovate: latest juju 3

src/charm.py

Lines changed: 24 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@
4040
MaintenanceStatus,
4141
Relation,
4242
SecretNotFoundError,
43+
Unit,
4344
WaitingStatus,
4445
)
4546
from ops.pebble import ChangeError, Layer, PathError, ProtocolError, ServiceStatus
@@ -163,6 +164,14 @@ def _generate_metrics_jobs(self, enable_tls: bool) -> Dict:
163164
},
164165
]
165166

167+
@property
168+
def app_units(self) -> set[Unit]:
169+
"""The peer-related units in the application."""
170+
if not self._peers:
171+
return set()
172+
173+
return {self.unit, *self._peers.units}
174+
166175
@property
167176
def app_peer_data(self) -> Dict:
168177
"""Application peer relation data object."""
@@ -541,21 +550,21 @@ def enable_disable_extensions(self, database: str = None) -> None:
541550
Args:
542551
database: optional database where to enable/disable the extension.
543552
"""
544-
orginial_status = self.unit.status
553+
original_status = self.unit.status
545554
for plugin in self.config.plugin_keys():
546555
enable = self.config[plugin]
547556
# Enable or disable the plugin/extension.
548557
extension = "_".join(plugin.split("_")[1:-1])
558+
self.unit.status = WaitingStatus(
559+
f"{'Enabling' if enable else 'Disabling'} {extension}"
560+
)
549561
try:
550-
self.unit.status = WaitingStatus(
551-
f"{'Enabling' if enable else 'Disabling'} {extension}"
552-
)
553562
self.postgresql.enable_disable_extension(extension, enable, database)
554-
self.unit.status = orginial_status
555563
except PostgreSQLEnableDisableExtensionError as e:
556564
logger.exception(
557565
f"failed to {'enable' if enable else 'disable'} {extension} plugin: %s", str(e)
558566
)
567+
self.unit.status = original_status
559568

560569
def _add_members(self, event) -> None:
561570
"""Add new cluster members.
@@ -1046,6 +1055,10 @@ def _on_stop(self, _):
10461055

10471056
def _on_update_status(self, _) -> None:
10481057
"""Update the unit status message."""
1058+
if not self.upgrade.idle:
1059+
logger.debug("Early exit on_update_status: upgrade in progress")
1060+
return
1061+
10491062
container = self.unit.get_container("postgresql")
10501063
if not container.can_connect():
10511064
logger.debug("on_update_status early exit: Cannot connect to container")
@@ -1204,7 +1217,11 @@ def _generate_metrics_service(self) -> Dict:
12041217
"override": "replace",
12051218
"summary": "postgresql metrics exporter",
12061219
"command": "/start-exporter.sh",
1207-
"startup": "enabled",
1220+
"startup": (
1221+
"enabled"
1222+
if self.get_secret("app", MONITORING_PASSWORD_KEY) is not None
1223+
else "disabled"
1224+
),
12081225
"after": [self._postgresql_service],
12091226
"user": WORKLOAD_OS_USER,
12101227
"group": WORKLOAD_OS_GROUP,
@@ -1415,6 +1432,7 @@ def update_config(self, is_creating_backup: bool = False) -> bool:
14151432
container.add_layer(
14161433
self._metrics_service,
14171434
Layer({"services": {self._metrics_service: self._generate_metrics_service()}}),
1435+
combine=True,
14181436
)
14191437
container.restart(self._metrics_service)
14201438

src/upgrade.py

Lines changed: 51 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -14,13 +14,15 @@
1414
from lightkube.core.client import Client
1515
from lightkube.core.exceptions import ApiError
1616
from lightkube.resources.apps_v1 import StatefulSet
17-
from ops.charm import WorkloadEvent
18-
from ops.model import BlockedStatus
17+
from ops.charm import UpgradeCharmEvent, WorkloadEvent
18+
from ops.model import BlockedStatus, MaintenanceStatus, RelationDataContent
1919
from pydantic import BaseModel
2020
from tenacity import RetryError, Retrying, stop_after_attempt, wait_fixed
2121
from typing_extensions import override
2222

23+
from constants import APP_SCOPE, MONITORING_PASSWORD_KEY, MONITORING_USER
2324
from patroni import SwitchoverFailedError
25+
from utils import new_password
2426

2527
logger = logging.getLogger(__name__)
2628

@@ -51,6 +53,7 @@ def __init__(self, charm, model: BaseModel, **kwargs) -> None:
5153
self.framework.observe(
5254
getattr(self.charm.on, "postgresql_pebble_ready"), self._on_postgresql_pebble_ready
5355
)
56+
self.framework.observe(self.charm.on.upgrade_charm, self._on_upgrade_charm_check_legacy)
5457

5558
@property
5659
def is_no_sync_member(self) -> bool:
@@ -80,7 +83,7 @@ def _on_postgresql_pebble_ready(self, event: WorkloadEvent) -> None:
8083
event.defer()
8184
return
8285

83-
if self.peer_relation.data[self.charm.unit].get("state") != "upgrading":
86+
if self.state not in ["upgrading", "recovery"]:
8487
return
8588

8689
# Don't mark the upgrade of this unit as completed until Patroni reports the
@@ -90,6 +93,15 @@ def _on_postgresql_pebble_ready(self, event: WorkloadEvent) -> None:
9093
event.defer()
9194
return
9295

96+
if self.charm.unit.is_leader():
97+
if not self.charm._patroni.primary_endpoint_ready:
98+
logger.debug(
99+
"Deferring on_pebble_ready: current unit is leader but primary endpoint is not ready yet"
100+
)
101+
event.defer()
102+
return
103+
self._set_up_new_credentials_for_legacy()
104+
93105
try:
94106
for attempt in Retrying(stop=stop_after_attempt(6), wait=wait_fixed(10)):
95107
with attempt:
@@ -120,6 +132,25 @@ def _on_upgrade_changed(self, _) -> None:
120132

121133
self.charm.update_config()
122134

135+
def _on_upgrade_charm_check_legacy(self, event: UpgradeCharmEvent) -> None:
136+
if not self.peer_relation:
137+
logger.debug("Wait all units join the upgrade relation")
138+
return
139+
140+
if self.state:
141+
# Do nothing - if state set, upgrade is supported
142+
return
143+
144+
logger.warning("Upgrading from unspecified version")
145+
146+
# All peers should set the state to upgrading.
147+
self.unit_upgrade_data.update({"state": "upgrading"})
148+
149+
if self.charm.unit.name != f"{self.charm.app.name}/{self.charm.app.planned_units() - 1}":
150+
self.charm.unit.status = MaintenanceStatus("upgrading unit")
151+
self.peer_relation.data[self.charm.unit].update({"state": "upgrading"})
152+
self._set_rolling_update_partition(self.charm.app.planned_units())
153+
123154
@override
124155
def pre_upgrade_check(self) -> None:
125156
"""Runs necessary checks validating the cluster is in a healthy state to upgrade.
@@ -220,3 +251,20 @@ def _set_first_rolling_update_partition(self) -> None:
220251
self._set_rolling_update_partition(self.charm.app.planned_units() - 1)
221252
except KubernetesClientError as e:
222253
raise ClusterNotReadyError(e.message, e.cause)
254+
255+
def _set_up_new_credentials_for_legacy(self) -> None:
256+
"""Create missing password and user."""
257+
if self.charm.get_secret(APP_SCOPE, MONITORING_PASSWORD_KEY) is None:
258+
self.charm.set_secret(APP_SCOPE, MONITORING_PASSWORD_KEY, new_password())
259+
users = self.charm.postgresql.list_users()
260+
if MONITORING_USER not in users:
261+
self.charm.postgresql.create_user(
262+
MONITORING_USER,
263+
self.charm.get_secret(APP_SCOPE, MONITORING_PASSWORD_KEY),
264+
extra_user_roles="pg_monitor",
265+
)
266+
267+
@property
268+
def unit_upgrade_data(self) -> RelationDataContent:
269+
"""Return the application upgrade data."""
270+
return self.peer_relation.data[self.charm.unit]

0 commit comments

Comments
 (0)