134134 SECRET_KEY_OVERRIDES ,
135135 SPI_MODULE ,
136136 SYSTEM_USERS ,
137+ TEMP_STORAGE_PATH ,
137138 TLS_CA_BUNDLE_FILE ,
138139 TLS_CA_FILE ,
139140 TLS_CERT_FILE ,
@@ -249,7 +250,8 @@ def __init__(self, *args):
249250
250251 self ._certs_path = "/usr/local/share/ca-certificates"
251252 self ._storage_path = str (self .meta .storages ["data" ].location )
252- self .pgdata_path = f"{ self ._storage_path } /pgdata"
253+ self ._actual_pgdata_path = f"{ self ._storage_path } /16/main"
254+ self .pgdata_path = "/var/lib/postgresql/16/main"
253255
254256 self .framework .observe (self .on .upgrade_charm , self ._on_upgrade_charm )
255257 self .postgresql_client_relation = PostgreSQLProvider (self )
@@ -1109,17 +1111,108 @@ def fix_leader_annotation(self) -> bool:
11091111 raise e
11101112 return True
11111113
1114+ def _replica_can_start (self ) -> bool :
1115+ """Check whether this replica is ready to start Patroni.
1116+
1117+ Returns False if the cluster hasn't been bootstrapped yet, or if the
1118+ leader hasn't added this unit's endpoint to the members list (which
1119+ controls the pg_hba replication entries on the primary).
1120+ """
1121+ if not self .is_cluster_initialised :
1122+ logger .debug ("Replica not ready: cluster not initialized" )
1123+ return False
1124+ if self ._endpoint not in self ._endpoints :
1125+ logger .debug ("Replica not ready: endpoint not yet in members list" )
1126+ return False
1127+ return True
1128+
11121129 def _create_pgdata (self , container : Container ):
1113- """Create the PostgreSQL data directory."""
1114- if not container .exists (self .pgdata_path ):
1130+ """Create the PostgreSQL data directories."""
1131+ logs_path = str (self .meta .storages ["logs" ].location )
1132+ waldir_path = f"{ logs_path } /16/main/pg_wal"
1133+ temp_path = str (self .meta .storages ["temp" ].location )
1134+ temp_tablespace_path = f"{ temp_path } /16/main/pgsql_tmp"
1135+
1136+ # Clear stale storage directories when a replica joins an initialized cluster.
1137+ # This is needed because PersistentVolumes may retain data from previous pods,
1138+ # and pg_basebackup requires empty --waldir and --tablespace directories.
1139+ # Clear on every call until pgdata is populated (PG_VERSION exists), since
1140+ # pg_basebackup can fail and leave stale files that prevent retries.
1141+ pgdata_populated = container .exists (f"{ self ._actual_pgdata_path } /PG_VERSION" )
1142+ if not self .unit .is_leader () and self .is_cluster_initialised and not pgdata_populated :
1143+ for path in [waldir_path , temp_tablespace_path ]:
1144+ if container .exists (path ):
1145+ try :
1146+ container .exec (["find" , path , "-mindepth" , "1" , "-delete" ]).wait_output ()
1147+ logger .info (
1148+ f"Cleared stale content from { path } for replica initialization"
1149+ )
1150+ except ExecError as e :
1151+ if "No such file or directory" not in str (e .stderr ):
1152+ logger .warning (f"Failed to clear { path } : { e } " )
1153+
1154+ # Create the pgdata directory on the storage mount (e.g., /var/lib/pg/data/16/main)
1155+ if not container .exists (self ._actual_pgdata_path ):
1156+ container .make_dir (
1157+ self ._actual_pgdata_path ,
1158+ permissions = 0o700 ,
1159+ user = WORKLOAD_OS_USER ,
1160+ group = WORKLOAD_OS_GROUP ,
1161+ make_parents = True ,
1162+ )
1163+ # Create the WAL directory (e.g., /var/lib/pg/logs/16/main/pg_wal)
1164+ if not container .exists (waldir_path ):
11151165 container .make_dir (
1116- self .pgdata_path , permissions = 0o700 , user = WORKLOAD_OS_USER , group = WORKLOAD_OS_GROUP
1166+ waldir_path ,
1167+ permissions = 0o700 ,
1168+ user = WORKLOAD_OS_USER ,
1169+ group = WORKLOAD_OS_GROUP ,
1170+ make_parents = True ,
11171171 )
1172+ # Create the temp tablespace directory (e.g., /var/lib/pg/temp/16/main/pgsql_tmp)
1173+ if not container .exists (temp_tablespace_path ):
1174+ container .make_dir (
1175+ temp_tablespace_path ,
1176+ permissions = 0o700 ,
1177+ user = WORKLOAD_OS_USER ,
1178+ group = WORKLOAD_OS_GROUP ,
1179+ make_parents = True ,
1180+ )
1181+ # Create a symlink from the default PostgreSQL data directory to our data directory
1182+ # (e.g., /var/lib/postgresql/16/main -> /var/lib/pg/data/16/main)
1183+ # Patroni and other tools will use the symlink path (self.pgdata_path)
1184+ # Note: This symlink is on ephemeral storage and may not persist across container restarts.
1185+ # It gets recreated on each pebble-ready event.
1186+ # The OCI image ships /var/lib/postgresql/16/main as a real directory, so we must
1187+ # remove it first if it exists as a non-symlink (e.g., on replicas).
1188+ container .make_dir (
1189+ "/var/lib/postgresql/16" ,
1190+ user = WORKLOAD_OS_USER ,
1191+ group = WORKLOAD_OS_GROUP ,
1192+ make_parents = True ,
1193+ )
1194+ container .exec ([
1195+ "bash" ,
1196+ "-c" ,
1197+ f"[ -L { self .pgdata_path } ] || rm -rf { self .pgdata_path } " ,
1198+ ]).wait ()
1199+ container .exec ([
1200+ "ln" ,
1201+ "-sfn" ,
1202+ self ._actual_pgdata_path ,
1203+ self .pgdata_path ,
1204+ ]).wait ()
1205+ container .exec ([
1206+ "chown" ,
1207+ "-h" ,
1208+ f"{ WORKLOAD_OS_USER } :{ WORKLOAD_OS_GROUP } " ,
1209+ self .pgdata_path ,
1210+ ]).wait ()
11181211 # Also, fix the permissions from the parent directory.
11191212 container .exec ([
11201213 "chown" ,
11211214 f"{ WORKLOAD_OS_USER } :{ WORKLOAD_OS_GROUP } " ,
1122- "/var/lib/postgresql/ archive" ,
1215+ str ( self . meta . storages [ " archive"]. location ) ,
11231216 ]).wait ()
11241217 container .exec ([
11251218 "chown" ,
@@ -1129,12 +1222,12 @@ def _create_pgdata(self, container: Container):
11291222 container .exec ([
11301223 "chown" ,
11311224 f"{ WORKLOAD_OS_USER } :{ WORKLOAD_OS_GROUP } " ,
1132- "/var/lib/postgresql/logs" ,
1225+ logs_path ,
11331226 ]).wait ()
11341227 container .exec ([
11351228 "chown" ,
11361229 f"{ WORKLOAD_OS_USER } :{ WORKLOAD_OS_GROUP } " ,
1137- "/var/lib/postgresql/temp" ,
1230+ temp_path ,
11381231 ]).wait ()
11391232
11401233 def _on_start (self , _ ) -> None :
@@ -1170,15 +1263,14 @@ def _on_postgresql_pebble_ready(self, event: WorkloadEvent) -> None:
11701263 # where the volume is mounted with more restrictive permissions.
11711264 self ._create_pgdata (container )
11721265
1173- # Defer the initialization of the workload in the replicas
1174- # if the cluster hasn't been bootstrap on the primary yet.
1175- # Otherwise, each unit will create a different cluster and
1176- # any update in the members list on the units won't have effect
1177- # on fixing that.
1178- if not self .unit .is_leader () and not self .is_cluster_initialised :
1179- logger .debug (
1180- "Deferring on_postgresql_pebble_ready: Not leader and cluster not initialized"
1181- )
1266+ # Defer the initialization of the workload in the replicas if the cluster
1267+ # hasn't been bootstrapped on the primary yet, or the leader hasn't added
1268+ # this unit's endpoint to the cluster members list yet. The endpoint
1269+ # controls pg_hba replication entries on the primary — without it,
1270+ # pg_basebackup is rejected, triggering retries and remove_data_directory()
1271+ # calls that can race with _create_pgdata() and break the pg_wal symlink
1272+ # created by --waldir.
1273+ if not self .unit .is_leader () and not self ._replica_can_start ():
11821274 event .defer ()
11831275 return
11841276
@@ -1337,7 +1429,7 @@ def _setup_users(self) -> None:
13371429 extra_user_roles = ["pg_monitor" ],
13381430 )
13391431
1340- self .postgresql .set_up_database (temp_location = "/var/lib/postgresql/temp " )
1432+ self .postgresql .set_up_database (temp_location = f" { TEMP_STORAGE_PATH } /16/main/pgsql_tmp " )
13411433
13421434 access_groups = self .postgresql .list_access_groups ()
13431435 if access_groups != set (ACCESS_GROUPS ):
@@ -1587,6 +1679,7 @@ def _fix_pod(self) -> None:
15871679 # Recreate k8s resources and add labels required for replication
15881680 # when the pod loses them (like when it's deleted).
15891681 self .push_tls_files_to_workload ()
1682+
15901683 if self .refresh is not None and not self .refresh .in_progress :
15911684 try :
15921685 self ._create_services ()
@@ -1733,9 +1826,9 @@ def _on_update_status_early_exit_checks(self, container) -> bool:
17331826 def _check_pgdata_storage_size (self ) -> None :
17341827 """Asserts that pgdata volume has at least 10% free space and blocks charm if not."""
17351828 try :
1736- total_size , _ , free_size = shutil .disk_usage (self .pgdata_path )
1829+ total_size , _ , free_size = shutil .disk_usage (self ._actual_pgdata_path )
17371830 except FileNotFoundError :
1738- logger .error ("pgdata folder not found in %s" , self .pgdata_path )
1831+ logger .error ("pgdata folder not found in %s" , self ._actual_pgdata_path )
17391832 return
17401833
17411834 logger .debug (
@@ -1870,6 +1963,7 @@ def _patroni(self):
18701963 self .primary_endpoint ,
18711964 self ._namespace ,
18721965 self ._storage_path ,
1966+ self .pgdata_path ,
18731967 self .get_secret (APP_SCOPE , USER_PASSWORD_KEY ),
18741968 self .get_secret (APP_SCOPE , REPLICATION_PASSWORD_KEY ),
18751969 self .get_secret (APP_SCOPE , REWIND_PASSWORD_KEY ),
@@ -2582,7 +2676,10 @@ def update_config(self, is_creating_backup: bool = False) -> bool:
25822676 logger .warning ("Early exit update_config: Unable to patch Patroni API" )
25832677 return False
25842678
2585- self ._patroni .ensure_slots_controller_by_patroni (replication_slots )
2679+ if not self ._patroni .ensure_slots_controller_by_patroni (replication_slots ):
2680+ logger .warning (
2681+ "Failed to sync replication slots with Patroni — will retry on next config update"
2682+ )
25862683
25872684 self ._handle_postgresql_restart_need (
25882685 self .unit_peer_data .get ("config_hash" ) != self .generate_config_hash
0 commit comments