Skip to content

Commit 8b9d456

Browse files
Enable Sync Mode (#134)
https://warthogs.atlassian.net/browse/DPE-1456 --------- Co-authored-by: Dragomir Penev <[email protected]>
1 parent b4874cc commit 8b9d456

File tree

14 files changed

+452
-328
lines changed

14 files changed

+452
-328
lines changed

src/charm.py

Lines changed: 52 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313
PostgreSQLUpdateUserPasswordError,
1414
)
1515
from charms.postgresql_k8s.v0.postgresql_tls import PostgreSQLTLS
16-
from charms.rolling_ops.v0.rollingops import RollingOpsManager
16+
from charms.rolling_ops.v0.rollingops import RollingOpsManager, RunWithLock
1717
from lightkube import ApiError, Client, codecs
1818
from lightkube.models.core_v1 import ServicePort
1919
from lightkube.resources.core_v1 import Endpoints, Pod, Service
@@ -76,7 +76,6 @@ def __init__(self, *args):
7676
self._context = {"namespace": self._namespace, "app_name": self._name}
7777
self.cluster_name = f"patroni-{self._name}"
7878

79-
self.framework.observe(self.on.install, self._on_install)
8079
self.framework.observe(self.on.config_changed, self._on_config_changed)
8180
self.framework.observe(self.on.leader_elected, self._on_leader_elected)
8281
self.framework.observe(self.on[PEER].relation_changed, self._on_peer_relation_changed)
@@ -211,10 +210,6 @@ def _on_peer_relation_departed(self, event: RelationDepartedEvent) -> None:
211210
self.postgresql_client_relation.update_read_only_endpoint()
212211
self._remove_from_endpoints(endpoints_to_remove)
213212

214-
# Update the replication configuration.
215-
self._patroni.render_postgresql_conf_file()
216-
self._patroni.reload_patroni_configuration()
217-
218213
def _on_peer_relation_changed(self, event: RelationChangedEvent) -> None:
219214
"""Reconfigure cluster members."""
220215
# The cluster must be initialized first in the leader unit
@@ -262,11 +257,6 @@ def _on_peer_relation_changed(self, event: RelationChangedEvent) -> None:
262257

263258
self.unit.status = ActiveStatus()
264259

265-
def _on_install(self, _) -> None:
266-
"""Event handler for InstallEvent."""
267-
# Creates custom postgresql.conf file.
268-
self._patroni.render_postgresql_conf_file()
269-
270260
def _on_config_changed(self, _) -> None:
271261
"""Handle the config-changed event."""
272262
# TODO: placeholder method to implement logic specific to configuration change.
@@ -384,13 +374,6 @@ def _on_leader_elected(self, event: LeaderElectedEvent) -> None:
384374

385375
self._add_members(event)
386376

387-
# Update the replication configuration.
388-
self._patroni.render_postgresql_conf_file()
389-
try:
390-
self._patroni.reload_patroni_configuration()
391-
except RetryError:
392-
pass # This error can happen in the first leader election, as Patroni is not running yet.
393-
394377
def _create_pgdata(self, container: Container):
395378
"""Create the PostgreSQL data directory."""
396379
path = f"{self._storage_path}/pgdata"
@@ -410,9 +393,6 @@ def _on_postgresql_pebble_ready(self, event: WorkloadEvent) -> None:
410393
# where the volume is mounted with more restrictive permissions.
411394
self._create_pgdata(container)
412395

413-
# Create a new config layer.
414-
new_layer = self._postgresql_layer()
415-
416396
self.unit.set_workload_version(self._patroni.rock_postgresql_version)
417397

418398
# Defer the initialization of the workload in the replicas
@@ -436,18 +416,8 @@ def _on_postgresql_pebble_ready(self, event: WorkloadEvent) -> None:
436416
event.defer()
437417
return
438418

439-
# Get the current layer.
440-
current_layer = container.get_plan()
441-
# Check if there are any changes to layer services.
442-
if current_layer.services != new_layer.services:
443-
# Changes were made, add the new layer.
444-
container.add_layer(self._postgresql_service, new_layer, combine=True)
445-
logging.info("Added updated layer 'postgresql' to Pebble plan")
446-
# TODO: move this file generation to on config changed hook
447-
# when adding configs to this charm.
448-
# Restart it and report a new status to Juju.
449-
container.restart(self._postgresql_service)
450-
logging.info("Restarted postgresql service")
419+
# Start the database service.
420+
self._update_pebble_layers()
451421

452422
# Ensure the member is up and running before marking the cluster as initialised.
453423
if not self._patroni.member_started:
@@ -840,6 +810,15 @@ def _postgresql_layer(self) -> Layer:
840810
"group": WORKLOAD_OS_GROUP,
841811
},
842812
},
813+
"checks": {
814+
self._postgresql_service: {
815+
"override": "replace",
816+
"level": "ready",
817+
"http": {
818+
"url": f"{self._patroni._patroni_url}/health",
819+
},
820+
}
821+
},
843822
}
844823
return Layer(layer_config)
845824

@@ -878,6 +857,15 @@ def push_tls_files_to_workload(self, container: Container = None) -> None:
878857
user=WORKLOAD_OS_USER,
879858
group=WORKLOAD_OS_GROUP,
880859
)
860+
container.push(
861+
"/usr/local/share/ca-certificates/ca.crt",
862+
ca,
863+
make_dirs=True,
864+
permissions=0o400,
865+
user=WORKLOAD_OS_USER,
866+
group=WORKLOAD_OS_GROUP,
867+
)
868+
container.exec(["update-ca-certificates"]).wait()
881869
if cert is not None:
882870
container.push(
883871
f"{self._storage_path}/{TLS_CERT_FILE}",
@@ -890,8 +878,13 @@ def push_tls_files_to_workload(self, container: Container = None) -> None:
890878

891879
self.update_config()
892880

893-
def _restart(self, _) -> None:
881+
def _restart(self, event: RunWithLock) -> None:
894882
"""Restart PostgreSQL."""
883+
if not self._patroni.are_all_members_ready():
884+
logger.debug("Early exit _restart: not all members ready yet")
885+
event.defer()
886+
return
887+
895888
try:
896889
self._patroni.restart_postgresql()
897890
except RetryError:
@@ -900,6 +893,9 @@ def _restart(self, _) -> None:
900893
self.unit.status = BlockedStatus(error_message)
901894
return
902895

896+
# Update health check URL.
897+
self._update_pebble_layers()
898+
903899
# Start or stop the pgBackRest TLS server service when TLS certificate change.
904900
self.backup.start_stop_pgbackrest_service()
905901

@@ -915,7 +911,6 @@ def update_config(self) -> None:
915911
backup_id=self.app_peer_data.get("restoring-backup"),
916912
stanza=self.app_peer_data.get("stanza"),
917913
)
918-
self._patroni.render_postgresql_conf_file()
919914
if not self._patroni.member_started:
920915
# If Patroni/PostgreSQL has not started yet and TLS relations was initialised,
921916
# then mark TLS as enabled. This commonly happens when the charm is deployed
@@ -934,6 +929,28 @@ def update_config(self) -> None:
934929
if restart_postgresql:
935930
self.on[self.restart_manager.name].acquire_lock.emit()
936931

932+
def _update_pebble_layers(self) -> None:
933+
"""Update the pebble layers to keep the health check URL up-to-date."""
934+
container = self.unit.get_container("postgresql")
935+
936+
# Get the current layer.
937+
current_layer = container.get_plan()
938+
939+
# Create a new config layer.
940+
new_layer = self._postgresql_layer()
941+
942+
# Check if there are any changes to layer services.
943+
if current_layer.services != new_layer.services:
944+
# Changes were made, add the new layer.
945+
container.add_layer(self._postgresql_service, new_layer, combine=True)
946+
logging.info("Added updated layer 'postgresql' to Pebble plan")
947+
container.restart(self._postgresql_service)
948+
logging.info("Restarted postgresql service")
949+
if current_layer.checks != new_layer.checks:
950+
# Changes were made, add the new layer.
951+
container.add_layer(self._postgresql_service, new_layer, combine=True)
952+
logging.info("Updated health checks")
953+
937954
def _unit_name_to_pod_name(self, unit_name: str) -> str:
938955
"""Converts unit name to pod name.
939956

src/patroni.py

Lines changed: 2 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -224,7 +224,7 @@ def render_patroni_yml_file(
224224
stanza: name of the stanza created by pgBackRest.
225225
backup_id: id of the backup that is being restored.
226226
"""
227-
# Open the template postgresql.conf file.
227+
# Open the template patroni.yml file.
228228
with open("templates/patroni.yml.j2", "r") as file:
229229
template = Template(file.read())
230230
# Render the template file with the correct values.
@@ -244,24 +244,11 @@ def render_patroni_yml_file(
244244
restoring_backup=backup_id is not None,
245245
backup_id=backup_id,
246246
stanza=stanza,
247+
minority_count=self._members_count // 2,
247248
version=self.rock_postgresql_version.split(".")[0],
248249
)
249250
self._render_file(f"{self._storage_path}/patroni.yml", rendered, 0o644)
250251

251-
def render_postgresql_conf_file(self) -> None:
252-
"""Render the PostgreSQL configuration file."""
253-
# Open the template postgresql.conf file.
254-
with open("templates/postgresql.conf.j2", "r") as file:
255-
template = Template(file.read())
256-
# Render the template file with the correct values.
257-
# TODO: add extra configurations here later.
258-
rendered = template.render(
259-
logging_collector="on",
260-
synchronous_commit="on" if self._members_count > 1 else "off",
261-
synchronous_standby_names="*",
262-
)
263-
self._render_file(f"{self._storage_path}/postgresql-k8s-operator.conf", rendered, 0o644)
264-
265252
@retry(stop=stop_after_attempt(3), wait=wait_exponential(multiplier=1, min=2, max=10))
266253
def reload_patroni_configuration(self) -> None:
267254
"""Reloads the configuration after it was updated in the file."""

templates/patroni.yml.j2

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,24 @@
11
bootstrap:
22
dcs:
3+
synchronous_mode: true
4+
synchronous_node_count: {{ minority_count }}
35
postgresql:
46
use_pg_rewind: true
57
remove_data_directory_on_rewind_failure: true
68
remove_data_directory_on_diverged_timelines: true
79
bin_dir: /usr/lib/postgresql/{{ version }}/bin
810
parameters:
11+
synchronous_commit: on
12+
synchronous_standby_names: "*"
913
{%- if enable_pgbackrest %}
1014
archive_command: 'pgbackrest --stanza={{ stanza }} archive-push %p'
1115
{% else %}
1216
archive_command: /bin/true
1317
{%- endif %}
1418
archive_mode: {{ archive_mode }}
19+
log_filename: 'postgresql.log'
20+
log_directory: '/var/log/postgresql'
21+
logging_collector: 'on'
1522
password_encryption: md5
1623
wal_level: logical
1724
{%- if restoring_backup %}
@@ -50,7 +57,6 @@ ctl:
5057
pod_ip: '{{ endpoint }}'
5158
postgresql:
5259
connect_address: '{{ endpoint }}:5432'
53-
custom_conf: {{ storage_path }}/postgresql-k8s-operator.conf
5460
data_dir: {{ storage_path }}/pgdata
5561
bin_dir: /usr/lib/postgresql/{{ version }}/bin
5662
listen: 0.0.0.0:5432

templates/postgresql.conf.j2

Lines changed: 0 additions & 7 deletions
This file was deleted.

tests/integration/ha_tests/application-charm/metadata.yaml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,3 +11,7 @@ requires:
1111
database:
1212
interface: postgresql_client
1313
limit: 1
14+
15+
peers:
16+
application-peers:
17+
interface: application-peers

0 commit comments

Comments
 (0)