Skip to content

Commit acdb957

Browse files
fahedouchfahed dorgaaFxKu
authored
fix switch over candidate retrieving (zalando#2760)
* fix switch over candidate retrieving Signed-off-by: fahed dorgaa <[email protected]> --------- Signed-off-by: fahed dorgaa <[email protected]> Co-authored-by: fahed dorgaa <[email protected]> Co-authored-by: Felix Kunde <[email protected]>
1 parent 8231797 commit acdb957

File tree

2 files changed

+17
-20
lines changed

2 files changed

+17
-20
lines changed

pkg/cluster/pod.go

Lines changed: 15 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -480,6 +480,9 @@ func (c *Cluster) getSwitchoverCandidate(master *v1.Pod) (spec.NamespacedName, e
480480
if PostgresRole(member.Role) == SyncStandby {
481481
syncCandidates = append(syncCandidates, member)
482482
}
483+
if PostgresRole(member.Role) != Leader && PostgresRole(member.Role) != StandbyLeader && slices.Contains([]string{"running", "streaming", "in archive recovery"}, member.State) {
484+
candidates = append(candidates, member)
485+
}
483486
}
484487

485488
// if synchronous mode is enabled and no SyncStandy was found
@@ -489,6 +492,12 @@ func (c *Cluster) getSwitchoverCandidate(master *v1.Pod) (spec.NamespacedName, e
489492
return false, nil
490493
}
491494

495+
// retry also in asynchronous mode when no replica candidate was found
496+
if !c.Spec.Patroni.SynchronousMode && len(candidates) == 0 {
497+
c.logger.Warnf("no replica candidate found - retrying fetching cluster members")
498+
return false, nil
499+
}
500+
492501
return true, nil
493502
},
494503
)
@@ -502,24 +511,12 @@ func (c *Cluster) getSwitchoverCandidate(master *v1.Pod) (spec.NamespacedName, e
502511
return syncCandidates[i].Lag < syncCandidates[j].Lag
503512
})
504513
return spec.NamespacedName{Namespace: master.Namespace, Name: syncCandidates[0].Name}, nil
505-
} else {
506-
// in asynchronous mode find running replicas
507-
for _, member := range members {
508-
if PostgresRole(member.Role) == Leader || PostgresRole(member.Role) == StandbyLeader {
509-
continue
510-
}
511-
512-
if slices.Contains([]string{"running", "streaming", "in archive recovery"}, member.State) {
513-
candidates = append(candidates, member)
514-
}
515-
}
516-
517-
if len(candidates) > 0 {
518-
sort.Slice(candidates, func(i, j int) bool {
519-
return candidates[i].Lag < candidates[j].Lag
520-
})
521-
return spec.NamespacedName{Namespace: master.Namespace, Name: candidates[0].Name}, nil
522-
}
514+
}
515+
if len(candidates) > 0 {
516+
sort.Slice(candidates, func(i, j int) bool {
517+
return candidates[i].Lag < candidates[j].Lag
518+
})
519+
return spec.NamespacedName{Namespace: master.Namespace, Name: candidates[0].Name}, nil
523520
}
524521

525522
return spec.NamespacedName{}, fmt.Errorf("no switchover candidate found")

pkg/cluster/pod_test.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,7 @@ func TestGetSwitchoverCandidate(t *testing.T) {
6262
expectedError: nil,
6363
},
6464
{
65-
subtest: "choose first replica when lag is equal evrywhere",
65+
subtest: "choose first replica when lag is equal everywhere",
6666
clusterJson: `{"members": [{"name": "acid-test-cluster-0", "role": "leader", "state": "running", "api_url": "http://192.168.100.1:8008/patroni", "host": "192.168.100.1", "port": 5432, "timeline": 1}, {"name": "acid-test-cluster-1", "role": "replica", "state": "streaming", "api_url": "http://192.168.100.2:8008/patroni", "host": "192.168.100.2", "port": 5432, "timeline": 1, "lag": 5}, {"name": "acid-test-cluster-2", "role": "replica", "state": "running", "api_url": "http://192.168.100.3:8008/patroni", "host": "192.168.100.3", "port": 5432, "timeline": 1, "lag": 5}]}`,
6767
syncModeEnabled: false,
6868
expectedCandidate: spec.NamespacedName{Namespace: namespace, Name: "acid-test-cluster-1"},
@@ -73,7 +73,7 @@ func TestGetSwitchoverCandidate(t *testing.T) {
7373
clusterJson: `{"members": [{"name": "acid-test-cluster-0", "role": "leader", "state": "running", "api_url": "http://192.168.100.1:8008/patroni", "host": "192.168.100.1", "port": 5432, "timeline": 2}, {"name": "acid-test-cluster-1", "role": "replica", "state": "starting", "api_url": "http://192.168.100.2:8008/patroni", "host": "192.168.100.2", "port": 5432, "timeline": 2}]}`,
7474
syncModeEnabled: false,
7575
expectedCandidate: spec.NamespacedName{},
76-
expectedError: fmt.Errorf("no switchover candidate found"),
76+
expectedError: fmt.Errorf("failed to get Patroni cluster members: unexpected end of JSON input"),
7777
},
7878
{
7979
subtest: "replicas with different status",

0 commit comments

Comments
 (0)