Skip to content

Commit 852fdf6

Browse files
armrumnencia
andauthored
fix(instance): improve error handling for RefreshSecrets (cloudnative-pg#7260)
Enhance error handling for the `RefreshSecrets` function across multiple components. Errors are now properly propagated and logged, ensuring that issues during secret refresh operations are surfaced and handled appropriately. Signed-off-by: Armando Ruocco <armando.ruocco@enterprisedb.com> Signed-off-by: Marco Nenciarini <marco.nenciarini@enterprisedb.com> Co-authored-by: Marco Nenciarini <marco.nenciarini@enterprisedb.com>
1 parent 3bc7061 commit 852fdf6

File tree

3 files changed

+41
-58
lines changed

3 files changed

+41
-58
lines changed

internal/cmd/manager/instance/join/cmd.go

Lines changed: 4 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -123,16 +123,10 @@ func joinSubCommand(ctx context.Context, instance *postgres.Instance, info postg
123123
return err
124124
}
125125

126-
// Since we're directly using the reconciler here, we cannot
127-
// tell if the secrets were correctly downloaded or not.
128-
// If they were the following "pg_basebackup" command will work, if
129-
// they don't "pg_basebackup" with fail, complaining that the
130-
// cryptographic material is not available.
131-
// So it doesn't make a real difference.
132-
//
133-
// Besides this, we should improve this situation to have
134-
// a real error handling.
135-
reconciler.RefreshSecrets(ctx, &cluster)
126+
if _, err := reconciler.RefreshSecrets(ctx, &cluster); err != nil {
127+
contextLogger.Error(err, "Error while refreshing secrets")
128+
return err
129+
}
136130

137131
// Run "pg_basebackup" to download the data directory from the primary
138132
if err := info.Join(ctx, &cluster); err != nil {

internal/cmd/manager/instance/upgrade/execute/cmd.go

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -107,6 +107,7 @@ func NewCmd() *cobra.Command {
107107
return cmd
108108
}
109109

110+
// nolint:gocognit
110111
func upgradeSubCommand(
111112
ctx context.Context,
112113
instance *postgres.Instance,
@@ -139,12 +140,9 @@ func upgradeSubCommand(
139140
return err
140141
}
141142

142-
// Since we're directly using the reconciler here, we cannot
143-
// tell if the secrets were correctly downloaded or not.
144-
// If they were the following "pg_upgrade" command will work, if
145-
// they don't "pg_upgrade" with fail, complaining that the
146-
// cryptographic material is not available.
147-
reconciler.RefreshSecrets(ctx, &cluster)
143+
if _, err := reconciler.RefreshSecrets(ctx, &cluster); err != nil {
144+
return fmt.Errorf("error while downloading secrets: %w", err)
145+
}
148146

149147
if err := reconciler.ReconcileWalStorage(ctx); err != nil {
150148
return fmt.Errorf("error while reconciling the WAL storage: %w", err)

internal/management/controller/instance_controller.go

Lines changed: 33 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -159,7 +159,10 @@ func (r *InstanceReconciler) Reconcile(
159159

160160
// Reconcile secrets and cryptographic material
161161
// This doesn't need the PG connection, but it needs to reload it in case of changes
162-
reloadNeeded := r.RefreshSecrets(ctx, cluster)
162+
reloadNeeded, err := r.RefreshSecrets(ctx, cluster)
163+
if err != nil {
164+
return reconcile.Result{}, fmt.Errorf("while refreshing secrets: %w", err)
165+
}
163166

164167
reloadConfigNeeded, err := r.refreshConfigurationFiles(ctx, cluster)
165168
if err != nil {
@@ -899,65 +902,53 @@ func (r *InstanceReconciler) reconcileMonitoringQueries(
899902
// It returns a boolean flag telling if something changed. Usually
900903
// the invoker will check that flag and reload the PostgreSQL
901904
// instance it is up.
902-
//
903-
// This function manages its own errors by logging them, so the
904-
// user cannot easily tell if the operation has been done completely.
905-
// The rationale behind this is:
906-
//
907-
// 1. when invoked at the startup of the instance manager, PostgreSQL
908-
// is not up. If this raise an error, then PostgreSQL won't
909-
// be able to start correctly (TLS certs are missing, i.e.),
910-
// making no difference between returning an error or not
911-
//
912-
// 2. when invoked inside the reconciliation loop, if the operation
913-
// raise an error, it's pointless to retry. The only way to recover
914-
// from such an error is wait for the CNPG operator to refresh the
915-
// resource version of the secrets to be used, and in that case a
916-
// reconciliation loop will be started again.
917905
func (r *InstanceReconciler) RefreshSecrets(
918906
ctx context.Context,
919907
cluster *apiv1.Cluster,
920-
) bool {
908+
) (bool, error) {
909+
type executor func(context.Context, *apiv1.Cluster) (bool, error)
910+
921911
contextLogger := log.FromContext(ctx)
922912

923-
changed := false
913+
var changed bool
914+
915+
secretRefresher := func(cb executor) error {
916+
localChanged, err := cb(ctx, cluster)
917+
if err == nil {
918+
changed = changed || localChanged
919+
return nil
920+
}
921+
922+
if !apierrors.IsNotFound(err) {
923+
return err
924+
}
924925

925-
serverSecretChanged, err := r.refreshServerCertificateFiles(ctx, cluster)
926-
if err == nil {
927-
changed = changed || serverSecretChanged
928-
} else if !apierrors.IsNotFound(err) {
926+
return nil
927+
}
928+
929+
if err := secretRefresher(r.refreshServerCertificateFiles); err != nil {
929930
contextLogger.Error(err, "Error while getting server secret")
931+
return changed, err
930932
}
931933

932-
replicationSecretChanged, err := r.refreshReplicationUserCertificate(ctx, cluster)
933-
if err == nil {
934-
changed = changed || replicationSecretChanged
935-
} else if !apierrors.IsNotFound(err) {
934+
if err := secretRefresher(r.refreshReplicationUserCertificate); err != nil {
936935
contextLogger.Error(err, "Error while getting streaming replication secret")
936+
return changed, err
937937
}
938-
939-
clientCaSecretChanged, err := r.refreshClientCA(ctx, cluster)
940-
if err == nil {
941-
changed = changed || clientCaSecretChanged
942-
} else if !apierrors.IsNotFound(err) {
938+
if err := secretRefresher(r.refreshClientCA); err != nil {
943939
contextLogger.Error(err, "Error while getting cluster CA Client secret")
940+
return changed, err
944941
}
945-
946-
serverCaSecretChanged, err := r.refreshServerCA(ctx, cluster)
947-
if err == nil {
948-
changed = changed || serverCaSecretChanged
949-
} else if !apierrors.IsNotFound(err) {
942+
if err := secretRefresher(r.refreshServerCA); err != nil {
950943
contextLogger.Error(err, "Error while getting cluster CA Server secret")
944+
return changed, err
951945
}
952-
953-
barmanEndpointCaSecretChanged, err := r.refreshBarmanEndpointCA(ctx, cluster)
954-
if err == nil {
955-
changed = changed || barmanEndpointCaSecretChanged
956-
} else if !apierrors.IsNotFound(err) {
946+
if err := secretRefresher(r.refreshBarmanEndpointCA); err != nil {
957947
contextLogger.Error(err, "Error while getting barman endpoint CA secret")
948+
return changed, err
958949
}
959950

960-
return changed
951+
return changed, nil
961952
}
962953

963954
// reconcileInstance sets PostgreSQL instance parameters to current values

0 commit comments

Comments
 (0)