@@ -395,84 +395,130 @@ func (c *Cluster) syncStatefulSet() error {
395
395
}
396
396
}
397
397
398
- // Apply special PostgreSQL parameters that can only be set via the Patroni API.
398
+ // apply PostgreSQL parameters that can only be set via the Patroni API.
399
399
// it is important to do it after the statefulset pods are there, but before the rolling update
400
400
// since those parameters require PostgreSQL restart.
401
401
pods , err = c .listPods ()
402
402
if err != nil {
403
- c .logger .Warnf ("could not get list of pods to apply special PostgreSQL parameters only to be set via Patroni API: %v" , err )
403
+ c .logger .Warnf ("could not get list of pods to apply PostgreSQL parameters only to be set via Patroni API: %v" , err )
404
404
}
405
405
406
+ requiredPgParameters := c .Spec .Parameters
407
+ // if streams are defined wal_level must be switched to logical
408
+ if len (c .Spec .Streams ) > 0 {
409
+ requiredPgParameters ["wal_level" ] = "logical"
410
+ }
411
+
412
+ // sync Patroni config
413
+ if configPatched , restartPrimaryFirst , restartWait , err = c .syncPatroniConfig (pods , c .Spec .Patroni , requiredPgParameters ); err != nil {
414
+ c .logger .Warningf ("Patroni config updated? %v - errors during config sync: %v" , configPatched , err )
415
+ isSafeToRecreatePods = false
416
+ }
417
+
418
+ // restart Postgres where it is still pending
419
+ if err = c .restartInstances (pods , restartWait , restartPrimaryFirst ); err != nil {
420
+ c .logger .Errorf ("errors while restarting Postgres in pods via Patroni API: %v" , err )
421
+ isSafeToRecreatePods = false
422
+ }
423
+
424
+ // if we get here we also need to re-create the pods (either leftovers from the old
425
+ // statefulset or those that got their configuration from the outdated statefulset)
426
+ if len (podsToRecreate ) > 0 {
427
+ if isSafeToRecreatePods {
428
+ c .logger .Debugln ("performing rolling update" )
429
+ c .eventRecorder .Event (c .GetReference (), v1 .EventTypeNormal , "Update" , "Performing rolling update" )
430
+ if err := c .recreatePods (podsToRecreate , switchoverCandidates ); err != nil {
431
+ return fmt .Errorf ("could not recreate pods: %v" , err )
432
+ }
433
+ c .eventRecorder .Event (c .GetReference (), v1 .EventTypeNormal , "Update" , "Rolling update done - pods have been recreated" )
434
+ } else {
435
+ c .logger .Warningf ("postpone pod recreation until next sync because of errors during config sync" )
436
+ }
437
+ }
438
+
439
+ return nil
440
+ }
441
+
442
+ func (c * Cluster ) syncPatroniConfig (pods []v1.Pod , requiredPatroniConfig acidv1.Patroni , requiredPgParameters map [string ]string ) (bool , bool , uint32 , error ) {
443
+ var (
444
+ effectivePatroniConfig acidv1.Patroni
445
+ effectivePgParameters map [string ]string
446
+ loopWait uint32
447
+ configPatched bool
448
+ restartPrimaryFirst bool
449
+ err error
450
+ )
451
+
452
+ errors := make ([]string , 0 )
453
+
406
454
// get Postgres config, compare with manifest and update via Patroni PATCH endpoint if it differs
407
- // Patroni's config endpoint is just a "proxy" to DCS. It is enough to patch it only once and it doesn't matter which pod is used
408
455
for i , pod := range pods {
409
- patroniConfig , pgParameters , err := c .getPatroniConfig (& pod )
456
+ podName := util .NameFromMeta (pods [i ].ObjectMeta )
457
+ effectivePatroniConfig , effectivePgParameters , err = c .patroni .GetConfig (& pod )
410
458
if err != nil {
411
- c .logger .Warningf ("%v" , err )
412
- isSafeToRecreatePods = false
459
+ errors = append (errors , fmt .Sprintf ("could not get Postgres config from pod %s: %v" , podName , err ))
413
460
continue
414
461
}
415
- restartWait = patroniConfig .LoopWait
462
+ loopWait = effectivePatroniConfig .LoopWait
416
463
417
464
// empty config probably means cluster is not fully initialized yet, e.g. restoring from backup
418
- // do not attempt a restart
419
- if ! reflect . DeepEqual ( patroniConfig , acidv1. Patroni {}) || len ( pgParameters ) > 0 {
420
- // compare config returned from Patroni with what is specified in the manifest
421
- configPatched , restartPrimaryFirst , err = c .checkAndSetGlobalPostgreSQLConfiguration (& pod , patroniConfig , c . Spec . Patroni , pgParameters , c . Spec . Parameters )
465
+ if reflect . DeepEqual ( effectivePatroniConfig , acidv1. Patroni {}) || len ( effectivePgParameters ) == 0 {
466
+ errors = append ( errors , fmt . Sprintf ( "empty Patroni config on pod %s - skipping config patch" , podName ))
467
+ } else {
468
+ configPatched , restartPrimaryFirst , err = c .checkAndSetGlobalPostgreSQLConfiguration (& pod , effectivePatroniConfig , requiredPatroniConfig , effectivePgParameters , requiredPgParameters )
422
469
if err != nil {
423
- c . logger . Warningf ("could not set PostgreSQL configuration options for pod %s: %v" , pods [ i ]. Name , err )
470
+ errors = append ( errors , fmt . Sprintf ("could not set PostgreSQL configuration options for pod %s: %v" , podName , err ) )
424
471
continue
425
472
}
426
473
427
474
// it could take up to LoopWait to apply the config
428
475
if configPatched {
429
- time .Sleep (time .Duration (restartWait )* time .Second + time .Second * 2 )
476
+ time .Sleep (time .Duration (loopWait )* time .Second + time .Second * 2 )
477
+ // Patroni's config endpoint is just a "proxy" to DCS.
478
+ // It is enough to patch it only once and it doesn't matter which pod is used
430
479
break
431
480
}
432
481
}
433
482
}
434
483
435
- // restart instances if it is still pending
484
+ if len (errors ) > 0 {
485
+ err = fmt .Errorf ("%v" , strings .Join (errors , `', '` ))
486
+ }
487
+
488
+ return configPatched , restartPrimaryFirst , loopWait , err
489
+ }
490
+
491
+ func (c * Cluster ) restartInstances (pods []v1.Pod , restartWait uint32 , restartPrimaryFirst bool ) (err error ) {
492
+ errors := make ([]string , 0 )
436
493
remainingPods := make ([]* v1.Pod , 0 )
494
+
437
495
skipRole := Master
438
496
if restartPrimaryFirst {
439
497
skipRole = Replica
440
498
}
499
+
441
500
for i , pod := range pods {
442
501
role := PostgresRole (pod .Labels [c .OpConfig .PodRoleLabel ])
443
502
if role == skipRole {
444
503
remainingPods = append (remainingPods , & pods [i ])
445
504
continue
446
505
}
447
506
if err = c .restartInstance (& pod , restartWait ); err != nil {
448
- c .logger .Errorf ("%v" , err )
449
- isSafeToRecreatePods = false
507
+ errors = append (errors , fmt .Sprintf ("%v" , err ))
450
508
}
451
509
}
452
510
453
511
// in most cases only the master should be left to restart
454
512
if len (remainingPods ) > 0 {
455
513
for _ , remainingPod := range remainingPods {
456
514
if err = c .restartInstance (remainingPod , restartWait ); err != nil {
457
- c .logger .Errorf ("%v" , err )
458
- isSafeToRecreatePods = false
515
+ errors = append (errors , fmt .Sprintf ("%v" , err ))
459
516
}
460
517
}
461
518
}
462
519
463
- // if we get here we also need to re-create the pods (either leftovers from the old
464
- // statefulset or those that got their configuration from the outdated statefulset)
465
- if len (podsToRecreate ) > 0 {
466
- if isSafeToRecreatePods {
467
- c .logger .Debugln ("performing rolling update" )
468
- c .eventRecorder .Event (c .GetReference (), v1 .EventTypeNormal , "Update" , "Performing rolling update" )
469
- if err := c .recreatePods (podsToRecreate , switchoverCandidates ); err != nil {
470
- return fmt .Errorf ("could not recreate pods: %v" , err )
471
- }
472
- c .eventRecorder .Event (c .GetReference (), v1 .EventTypeNormal , "Update" , "Rolling update done - pods have been recreated" )
473
- } else {
474
- c .logger .Warningf ("postpone pod recreation until next sync" )
475
- }
520
+ if len (errors ) > 0 {
521
+ return fmt .Errorf ("%v" , strings .Join (errors , `', '` ))
476
522
}
477
523
478
524
return nil
0 commit comments