Skip to content

Commit 85f5cfd

Browse files
Add in-place upgrades (#138)
Based on specification: [DA058 - In-Place Upgrades - Kubernetes v2](https://docs.google.com/document/d/1tLjknwHudjcHs42nzPVBNkHs98XxAOT2BXGGpP7NyEU/edit)
1 parent 0f7920f commit 85f5cfd

25 files changed

+608
-61
lines changed

.github/workflows/ci.yaml

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -100,6 +100,13 @@ jobs:
100100
run: |
101101
pipx install tox
102102
pipx install poetry
103+
- name: Free up disk space
104+
run: |
105+
# From https://github.com/actions/runner-images/issues/2840#issuecomment-790492173
106+
sudo rm -rf /usr/share/dotnet
107+
sudo rm -rf /opt/ghc
108+
sudo rm -rf /usr/local/share/boost
109+
sudo rm -rf "$AGENT_TOOLSDIRECTORY"
103110
- name: Setup operator environment
104111
uses: charmed-kubernetes/actions-operator@main
105112
with:

CONTRIBUTING.md

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -66,8 +66,9 @@ juju add-model dev
6666
# Enable DEBUG logging
6767
juju model-config logging-config="<root>=INFO;unit=DEBUG"
6868
# Deploy the charm
69+
# `--trust` needed if Role Based Access Control (RBAC) (https://kubernetes.io/docs/concepts/security/rbac-good-practices/) is enabled on Kubernetes
6970
juju deploy ./mysqlrouter-operator_ubuntu-20.04-amd64.charm \
70-
--resource mysql-router-image=mysql/mysql-router:8.0
71+
--resource mysql-router-image=mysql/mysql-router:8.0 --trust
7172
```
7273

7374
## Canonical Contributor Agreement

README.md

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -14,12 +14,12 @@ flowchart TD
1414

1515
## Usage
1616

17-
This charm must be used coupled with mysql-operator charm, through a relation, e.g.:
17+
This charm must be used coupled with mysql charm, through a relation, e.g.:
1818

1919
```bash
20-
juju deploy mysql-operator
21-
juju deploy mysql-router-operator
22-
juju add-relation mysql-operator mysql-router-operator
20+
juju deploy mysql-k8s --trust
21+
juju deploy mysql-router-k8s --trust
22+
juju integrate mysql-k8s mysql-router-k8s
2323
```
2424

2525
## Relations

actions.yaml

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,13 @@
11
# Copyright 2023 Canonical Ltd.
22
# See LICENSE file for licensing details.
33

4+
resume-upgrade:
5+
description: Upgrade remaining units (after you manually verified that upgraded units are healthy).
6+
params:
7+
force:
8+
type: boolean
9+
description: Force upgrade of next unit if an upgraded unit has non-active status.
10+
required: []
411
set-tls-private-key:
512
description:
613
Set the private key, which will be used for certificate signing requests (CSR). Run

charm_version

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
1

charmcraft.yaml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,9 @@ parts:
1919
exit 1
2020
fi
2121
charm-entrypoint: src/kubernetes_charm.py
22+
prime:
23+
- charm_version
24+
- workload_version
2225
build-packages:
2326
- libffi-dev
2427
- libssl-dev

metadata.yaml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,11 @@ requires:
3232
interface: tls-certificates
3333
limit: 1
3434
peers:
35+
upgrade-version-a:
36+
# Relation versioning scheme:
37+
# DA056 - Upgrading in-place upgrade protocol
38+
# https://docs.google.com/document/d/1H7qy5SAwLiCOKO9xMQJbbQP5_-jGV6Lhi-mJOk4gZ08/edit
39+
interface: upgrade
3540
mysql-router-peers:
3641
interface: mysql_router_peers
3742
resources:

poetry.lock

Lines changed: 12 additions & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

pyproject.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ python = "^3.10"
1313
ops = "^2.6.0"
1414
lightkube = "^0.14.0"
1515
tenacity = "^8.2.3"
16+
poetry-core = "^1.7.0"
1617

1718
[tool.poetry.group.charm-libs.dependencies]
1819
# data_platform_libs/v0/data_interfaces.py

src/abstract_charm.py

Lines changed: 74 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
import lifecycle
1616
import relations.database_provides
1717
import relations.database_requires
18+
import upgrade
1819
import workload
1920

2021
logger = logging.getLogger(__name__)
@@ -34,11 +35,21 @@ def __init__(self, *args) -> None:
3435
self._authenticated_workload_type = workload.AuthenticatedWorkload
3536
self._database_requires = relations.database_requires.RelationEndpoint(self)
3637
self._database_provides = relations.database_provides.RelationEndpoint(self)
37-
self.framework.observe(self.on.update_status, self.reconcile_database_relations)
38-
# Set status on first start if no relations active
39-
self.framework.observe(self.on.start, self.reconcile_database_relations)
38+
self.framework.observe(self.on.update_status, self.reconcile)
39+
self.framework.observe(
40+
self.on[upgrade.PEER_RELATION_ENDPOINT_NAME].relation_changed, self.reconcile
41+
)
42+
self.framework.observe(
43+
self.on[upgrade.RESUME_ACTION_NAME].action, self._on_resume_upgrade_action
44+
)
45+
# (For Kubernetes) Reset partition after scale down
46+
self.framework.observe(
47+
self.on[upgrade.PEER_RELATION_ENDPOINT_NAME].relation_departed, self.reconcile
48+
)
49+
# Handle upgrade & set status on first start if no relations active
50+
self.framework.observe(self.on.start, self.reconcile)
4051
# Update app status
41-
self.framework.observe(self.on.leader_elected, self.reconcile_database_relations)
52+
self.framework.observe(self.on.leader_elected, self.reconcile)
4253

4354
@property
4455
@abc.abstractmethod
@@ -59,6 +70,11 @@ def _tls_certificate_saved(self) -> bool:
5970
def _container(self) -> container.Container:
6071
"""Workload container (snap or ROCK)"""
6172

73+
@property
74+
@abc.abstractmethod
75+
def _upgrade(self) -> typing.Optional[upgrade.Upgrade]:
76+
pass
77+
6278
@property
6379
@abc.abstractmethod
6480
def _read_write_endpoint(self) -> str:
@@ -88,8 +104,8 @@ def _prioritize_statuses(statuses: typing.List[ops.StatusBase]) -> ops.StatusBas
88104
"""
89105
status_priority = (
90106
ops.BlockedStatus,
91-
ops.WaitingStatus,
92107
ops.MaintenanceStatus,
108+
ops.WaitingStatus,
93109
# Catch any unknown status type
94110
ops.StatusBase,
95111
)
@@ -101,6 +117,11 @@ def _prioritize_statuses(statuses: typing.List[ops.StatusBase]) -> ops.StatusBas
101117

102118
def _determine_app_status(self, *, event) -> ops.StatusBase:
103119
"""Report app status."""
120+
if self._upgrade and (upgrade_status := self._upgrade.app_status):
121+
# Upgrade status should take priority over relation status—even if the status level is
122+
# normally lower priority.
123+
# (Relations should not be modified during upgrade.)
124+
return upgrade_status
104125
statuses = []
105126
for endpoint in (self._database_requires, self._database_provides):
106127
if status := endpoint.get_status(event):
@@ -111,16 +132,19 @@ def _determine_unit_status(self, *, event) -> ops.StatusBase:
111132
"""Report unit status."""
112133
statuses = []
113134
workload_ = self.get_workload(event=event)
114-
statuses.append(workload_.get_status(event))
135+
statuses.append(workload_.status)
136+
if self._upgrade:
137+
statuses.append(self._upgrade.unit_juju_status)
115138
return self._prioritize_statuses(statuses)
116139

117-
def set_status(self, *, event) -> None:
140+
def set_status(self, *, event, app=True, unit=True) -> None:
118141
"""Set charm status."""
119-
if self._unit_lifecycle.authorized_leader:
142+
if app and self._unit_lifecycle.authorized_leader:
120143
self.app.status = self._determine_app_status(event=event)
121144
logger.debug(f"Set app status to {self.app.status}")
122-
self.unit.status = self._determine_unit_status(event=event)
123-
logger.debug(f"Set unit status to {self.unit.status}")
145+
if unit:
146+
self.unit.status = self._determine_unit_status(event=event)
147+
logger.debug(f"Set unit status to {self.unit.status}")
124148

125149
def wait_until_mysql_router_ready(self) -> None:
126150
"""Wait until a connection to MySQL Router is possible.
@@ -149,21 +173,38 @@ def wait_until_mysql_router_ready(self) -> None:
149173
# Handlers
150174
# =======================
151175

152-
def reconcile_database_relations(self, event=None) -> None:
153-
"""Handle database requires/provides events."""
176+
def reconcile(self, event=None) -> None: # noqa: C901
177+
"""Handle most events."""
178+
if not self._upgrade:
179+
logger.debug("Peer relation not available")
180+
return
181+
if self._upgrade.unit_state == "restarting":
182+
if not self._upgrade.is_compatible:
183+
self.unit.status = ops.BlockedStatus(
184+
"Upgrade incompatible. Rollback to previous revision with `juju refresh`"
185+
)
186+
self.set_status(event=event, unit=False)
187+
return
154188
workload_ = self.get_workload(event=event)
155189
logger.debug(
156190
"State of reconcile "
157191
f"{self._unit_lifecycle.authorized_leader=}, "
158192
f"{isinstance(workload_, workload.AuthenticatedWorkload)=}, "
159193
f"{workload_.container_ready=}, "
160-
f"{self._database_requires.is_relation_breaking(event)=}"
194+
f"{self._database_requires.is_relation_breaking(event)=}, "
195+
f"{self._upgrade.in_progress=}"
161196
)
162197
if self._unit_lifecycle.authorized_leader:
163198
if self._database_requires.is_relation_breaking(event):
199+
if self._upgrade.in_progress:
200+
logger.warning(
201+
"Modifying relations during an upgrade is not supported. The charm may be in a broken, unrecoverable state. Re-deploy the charm"
202+
)
164203
self._database_provides.delete_all_databags()
165204
elif (
166-
isinstance(workload_, workload.AuthenticatedWorkload) and workload_.container_ready
205+
not self._upgrade.in_progress
206+
and isinstance(workload_, workload.AuthenticatedWorkload)
207+
and workload_.container_ready
167208
):
168209
self._database_provides.reconcile_users(
169210
event=event,
@@ -175,4 +216,23 @@ def reconcile_database_relations(self, event=None) -> None:
175216
workload_.enable(tls=self._tls_certificate_saved, unit_name=self.unit.name)
176217
elif workload_.container_ready:
177218
workload_.disable()
219+
if not workload_.status:
220+
self._upgrade.unit_state = "healthy"
221+
if self._unit_lifecycle.authorized_leader:
222+
self._upgrade.reconcile_partition()
223+
if not self._upgrade.in_progress:
224+
self._upgrade.set_versions_in_app_databag()
178225
self.set_status(event=event)
226+
227+
def _on_resume_upgrade_action(self, event: ops.ActionEvent) -> None:
228+
if not self._unit_lifecycle.authorized_leader:
229+
message = f"Must run action on leader unit. (e.g. `juju run {self.app.name}/leader {upgrade.RESUME_ACTION_NAME}`)"
230+
logger.debug(f"Resume upgrade event failed: {message}")
231+
event.fail(message)
232+
return
233+
if not self._upgrade or not self._upgrade.in_progress:
234+
message = "No upgrade in progress"
235+
logger.debug(f"Resume upgrade event failed: {message}")
236+
event.fail(message)
237+
return
238+
self._upgrade.reconcile_partition(action_event=event)

0 commit comments

Comments
 (0)