Skip to content

Commit 447a079

Browse files
leonardocearmru
andauthored
feat: keep track of the PG system ID in the status (cloudnative-pg#7717)
This patch add field names `.status.SystemID`, that keeps track of the system ID reported by the PostgreSQL instances. The new field is set only if the reported system ID is consistent across all the instances. If there is no reported system ID or if there are inconsistencies, the field set to empty. A new condition keeps track of the consistency status. Closes: cloudnative-pg#7716 Signed-off-by: Leonardo Cecchi <leonardo.cecchi@enterprisedb.com> Signed-off-by: Armando Ruocco <armando.ruocco@enterprisedb.com> Co-authored-by: Armando Ruocco <armando.ruocco@enterprisedb.com>
1 parent c8628fc commit 447a079

File tree

6 files changed

+276
-0
lines changed

6 files changed

+276
-0
lines changed

.wordlist-en-custom.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -480,6 +480,7 @@ SynchronizeReplicasConfiguration
480480
SynchronousReplicaConfiguration
481481
SynchronousReplicaConfigurationMethod
482482
Synopsys
483+
SystemID
483484
TCP
484485
TLS
485486
TLSv
@@ -1331,6 +1332,7 @@ synchronizeReplicas
13311332
synchronizeReplicasCache
13321333
sys
13331334
syslog
1335+
systemID
13341336
systemd
13351337
sysv
13361338
tAc

api/v1/cluster_types.go

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -956,6 +956,10 @@ type ClusterStatus struct {
956956
// WAL file, and Time of latest checkpoint
957957
// +optional
958958
DemotionToken string `json:"demotionToken,omitempty"`
959+
960+
// SystemID is the latest detected PostgreSQL SystemID
961+
// +optional
962+
SystemID string `json:"systemID,omitempty"`
959963
}
960964

961965
// ImageInfo contains the information about a PostgreSQL image
@@ -996,6 +1000,9 @@ const (
9961000
ConditionBackup ClusterConditionType = "LastBackupSucceeded"
9971001
// ConditionClusterReady represents whether a cluster is Ready
9981002
ConditionClusterReady ClusterConditionType = "Ready"
1003+
// ConditionConsistentSystemID is true when the all the instances of the
1004+
// cluster report the same System ID.
1005+
ConditionConsistentSystemID ClusterConditionType = "ConsistentSystemID"
9991006
)
10001007

10011008
// ConditionStatus defines conditions of resources

config/crd/bases/postgresql.cnpg.io_clusters.yaml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6376,6 +6376,9 @@ spec:
63766376
of switching a cluster to a replica cluster.
63776377
type: boolean
63786378
type: object
6379+
systemID:
6380+
description: SystemID is the latest detected PostgreSQL SystemID
6381+
type: string
63796382
tablespacesStatus:
63806383
description: TablespacesStatus reports the state of the declarative
63816384
tablespaces in the cluster

docs/src/cloudnative-pg.v1.md

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2289,6 +2289,13 @@ TimeLineID, Latest checkpoint's REDO location, Latest checkpoint's REDO
22892289
WAL file, and Time of latest checkpoint</p>
22902290
</td>
22912291
</tr>
2292+
<tr><td><code>systemID</code><br/>
2293+
<i>string</i>
2294+
</td>
2295+
<td>
2296+
<p>SystemID is the latest detected PostgreSQL SystemID</p>
2297+
</td>
2298+
</tr>
22922299
</tbody>
22932300
</table>
22942301

internal/controller/cluster_status.go

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,9 +28,11 @@ import (
2828

2929
"github.com/cloudnative-pg/machinery/pkg/log"
3030
pgTime "github.com/cloudnative-pg/machinery/pkg/postgres/time"
31+
"github.com/cloudnative-pg/machinery/pkg/stringset"
3132
batchv1 "k8s.io/api/batch/v1"
3233
corev1 "k8s.io/api/core/v1"
3334
apierrs "k8s.io/apimachinery/pkg/api/errors"
35+
"k8s.io/apimachinery/pkg/api/meta"
3436
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
3537
"k8s.io/utils/strings/slices"
3638
"sigs.k8s.io/controller-runtime/pkg/client"
@@ -763,12 +765,53 @@ func (r *ClusterReconciler) updateClusterStatusThatRequiresInstancesState(
763765
}
764766

765767
// we update any relevant cluster status that depends on the primary instance
768+
detectedSystemID := stringset.New()
766769
for _, item := range statuses.Items {
767770
// we refresh the last known timeline on the status root.
768771
// This avoids to have a zero timeline id in case that no primary instance is up during reconciliation.
769772
if item.IsPrimary && item.TimeLineID != 0 {
770773
cluster.Status.TimelineID = item.TimeLineID
771774
}
775+
if item.SystemID != "" {
776+
detectedSystemID.Put(item.SystemID)
777+
}
778+
}
779+
780+
// we update the system ID field in the cluster status
781+
switch detectedSystemID.Len() {
782+
case 0:
783+
cluster.Status.SystemID = ""
784+
785+
message := "No instances are present in the cluster to report a system ID."
786+
if len(statuses.Items) > 0 {
787+
message = "Instances are present, but none have reported a system ID."
788+
}
789+
790+
meta.SetStatusCondition(&cluster.Status.Conditions, metav1.Condition{
791+
Type: string(apiv1.ConditionConsistentSystemID),
792+
Status: metav1.ConditionFalse,
793+
Reason: "NotFound",
794+
Message: message,
795+
})
796+
797+
case 1:
798+
cluster.Status.SystemID = detectedSystemID.ToList()[0]
799+
meta.SetStatusCondition(&cluster.Status.Conditions, metav1.Condition{
800+
Type: string(apiv1.ConditionConsistentSystemID),
801+
Status: metav1.ConditionTrue,
802+
Reason: "Unique",
803+
Message: "A single, unique system ID was found across reporting instances.",
804+
})
805+
806+
default:
807+
// the instances are reporting an inconsistent system ID
808+
cluster.Status.SystemID = ""
809+
meta.SetStatusCondition(&cluster.Status.Conditions, metav1.Condition{
810+
Type: string(apiv1.ConditionConsistentSystemID),
811+
Status: metav1.ConditionFalse,
812+
Reason: "Mismatch",
813+
Message: fmt.Sprintf("Multiple differing system IDs reported by instances: %q", detectedSystemID.ToSortedList()),
814+
})
772815
}
773816

774817
if !reflect.DeepEqual(existingClusterStatus, cluster.Status) {

internal/controller/cluster_status_test.go

Lines changed: 214 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,11 +24,14 @@ import (
2424

2525
batchv1 "k8s.io/api/batch/v1"
2626
corev1 "k8s.io/api/core/v1"
27+
"k8s.io/apimachinery/pkg/api/meta"
28+
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
2729
"k8s.io/apimachinery/pkg/types"
2830
"k8s.io/apimachinery/pkg/util/rand"
2931

3032
v1 "github.com/cloudnative-pg/cloudnative-pg/api/v1"
3133
"github.com/cloudnative-pg/cloudnative-pg/pkg/certs"
34+
"github.com/cloudnative-pg/cloudnative-pg/pkg/postgres"
3235
"github.com/cloudnative-pg/cloudnative-pg/pkg/reconciler/persistentvolumeclaim"
3336

3437
. "github.com/onsi/ginkgo/v2"
@@ -174,3 +177,214 @@ var _ = Describe("cluster_status unit tests", func() {
174177
})
175178
})
176179
})
180+
181+
var _ = Describe("updateClusterStatusThatRequiresInstancesState tests", func() {
182+
var (
183+
env *testingEnvironment
184+
cluster *v1.Cluster
185+
)
186+
187+
BeforeEach(func() {
188+
env = buildTestEnvironment()
189+
cluster = newFakeCNPGCluster(env.client, newFakeNamespace(env.client))
190+
})
191+
192+
It("should handle empty status list", func(ctx SpecContext) {
193+
statuses := postgres.PostgresqlStatusList{}
194+
195+
err := env.clusterReconciler.updateClusterStatusThatRequiresInstancesState(ctx, cluster, statuses)
196+
Expect(err).ToNot(HaveOccurred())
197+
198+
Expect(cluster.Status.InstancesReportedState).To(BeEmpty())
199+
Expect(cluster.Status.SystemID).To(BeEmpty())
200+
201+
condition := meta.FindStatusCondition(cluster.Status.Conditions, string(v1.ConditionConsistentSystemID))
202+
Expect(condition).ToNot(BeNil())
203+
Expect(condition.Status).To(Equal(metav1.ConditionFalse))
204+
Expect(condition.Reason).To(Equal("NotFound"))
205+
Expect(condition.Message).To(Equal("No instances are present in the cluster to report a system ID."))
206+
})
207+
208+
It("should handle instances without SystemID", func(ctx SpecContext) {
209+
statuses := postgres.PostgresqlStatusList{
210+
Items: []postgres.PostgresqlStatus{
211+
{
212+
Pod: &corev1.Pod{
213+
ObjectMeta: metav1.ObjectMeta{Name: "pod-1"},
214+
Status: corev1.PodStatus{PodIP: "192.168.1.1"},
215+
},
216+
IsPrimary: true,
217+
TimeLineID: 123,
218+
SystemID: "",
219+
},
220+
{
221+
Pod: &corev1.Pod{
222+
ObjectMeta: metav1.ObjectMeta{Name: "pod-2"},
223+
Status: corev1.PodStatus{PodIP: "192.168.1.2"},
224+
},
225+
IsPrimary: false,
226+
SystemID: "",
227+
},
228+
},
229+
}
230+
231+
err := env.clusterReconciler.updateClusterStatusThatRequiresInstancesState(ctx, cluster, statuses)
232+
Expect(err).ToNot(HaveOccurred())
233+
234+
Expect(cluster.Status.InstancesReportedState).To(HaveLen(2))
235+
Expect(cluster.Status.TimelineID).To(Equal(123))
236+
Expect(cluster.Status.SystemID).To(BeEmpty())
237+
238+
condition := meta.FindStatusCondition(cluster.Status.Conditions, string(v1.ConditionConsistentSystemID))
239+
Expect(condition).ToNot(BeNil())
240+
Expect(condition.Status).To(Equal(metav1.ConditionFalse))
241+
Expect(condition.Reason).To(Equal("NotFound"))
242+
Expect(condition.Message).To(Equal("Instances are present, but none have reported a system ID."))
243+
})
244+
245+
It("should handle instances with a single SystemID", func(ctx SpecContext) {
246+
const systemID = "system123"
247+
statuses := postgres.PostgresqlStatusList{
248+
Items: []postgres.PostgresqlStatus{
249+
{
250+
Pod: &corev1.Pod{
251+
ObjectMeta: metav1.ObjectMeta{Name: "pod-1"},
252+
Status: corev1.PodStatus{PodIP: "192.168.1.1"},
253+
},
254+
IsPrimary: true,
255+
TimeLineID: 123,
256+
SystemID: systemID,
257+
},
258+
{
259+
Pod: &corev1.Pod{
260+
ObjectMeta: metav1.ObjectMeta{Name: "pod-2"},
261+
Status: corev1.PodStatus{PodIP: "192.168.1.2"},
262+
},
263+
IsPrimary: false,
264+
SystemID: systemID,
265+
},
266+
},
267+
}
268+
269+
err := env.clusterReconciler.updateClusterStatusThatRequiresInstancesState(ctx, cluster, statuses)
270+
Expect(err).ToNot(HaveOccurred())
271+
272+
Expect(cluster.Status.InstancesReportedState).To(HaveLen(2))
273+
Expect(cluster.Status.TimelineID).To(Equal(123))
274+
Expect(cluster.Status.SystemID).To(Equal(systemID))
275+
276+
condition := meta.FindStatusCondition(cluster.Status.Conditions, string(v1.ConditionConsistentSystemID))
277+
Expect(condition).ToNot(BeNil())
278+
Expect(condition.Status).To(Equal(metav1.ConditionTrue))
279+
Expect(condition.Reason).To(Equal("Unique"))
280+
Expect(condition.Message).To(Equal("A single, unique system ID was found across reporting instances."))
281+
})
282+
283+
It("should handle instances with multiple SystemIDs", func(ctx SpecContext) {
284+
statuses := postgres.PostgresqlStatusList{
285+
Items: []postgres.PostgresqlStatus{
286+
{
287+
Pod: &corev1.Pod{
288+
ObjectMeta: metav1.ObjectMeta{Name: "pod-1"},
289+
Status: corev1.PodStatus{PodIP: "192.168.1.1"},
290+
},
291+
IsPrimary: true,
292+
TimeLineID: 123,
293+
SystemID: "system1",
294+
},
295+
{
296+
Pod: &corev1.Pod{
297+
ObjectMeta: metav1.ObjectMeta{Name: "pod-2"},
298+
Status: corev1.PodStatus{PodIP: "192.168.1.2"},
299+
},
300+
IsPrimary: false,
301+
SystemID: "system2",
302+
},
303+
},
304+
}
305+
306+
err := env.clusterReconciler.updateClusterStatusThatRequiresInstancesState(ctx, cluster, statuses)
307+
Expect(err).ToNot(HaveOccurred())
308+
309+
Expect(cluster.Status.InstancesReportedState).To(HaveLen(2))
310+
Expect(cluster.Status.TimelineID).To(Equal(123))
311+
Expect(cluster.Status.SystemID).To(BeEmpty())
312+
313+
condition := meta.FindStatusCondition(cluster.Status.Conditions, string(v1.ConditionConsistentSystemID))
314+
Expect(condition).ToNot(BeNil())
315+
Expect(condition.Status).To(Equal(metav1.ConditionFalse))
316+
Expect(condition.Reason).To(Equal("Mismatch"))
317+
Expect(condition.Message).To(ContainSubstring("Multiple differing system IDs reported by instances:"))
318+
Expect(condition.Message).To(ContainSubstring("system1"))
319+
Expect(condition.Message).To(ContainSubstring("system2"))
320+
})
321+
322+
It("should update timeline ID from the primary instance", func(ctx SpecContext) {
323+
const timelineID = 999
324+
statuses := postgres.PostgresqlStatusList{
325+
Items: []postgres.PostgresqlStatus{
326+
{
327+
Pod: &corev1.Pod{
328+
ObjectMeta: metav1.ObjectMeta{Name: "pod-1"},
329+
Status: corev1.PodStatus{PodIP: "192.168.1.1"},
330+
},
331+
IsPrimary: true,
332+
TimeLineID: timelineID,
333+
SystemID: "system1",
334+
},
335+
{
336+
Pod: &corev1.Pod{
337+
ObjectMeta: metav1.ObjectMeta{Name: "pod-2"},
338+
Status: corev1.PodStatus{PodIP: "192.168.1.2"},
339+
},
340+
IsPrimary: false,
341+
TimeLineID: 123,
342+
SystemID: "system1",
343+
},
344+
},
345+
}
346+
347+
err := env.clusterReconciler.updateClusterStatusThatRequiresInstancesState(ctx, cluster, statuses)
348+
Expect(err).ToNot(HaveOccurred())
349+
350+
Expect(cluster.Status.TimelineID).To(Equal(timelineID))
351+
})
352+
353+
It("should correctly populate InstancesReportedState", func(ctx SpecContext) {
354+
statuses := postgres.PostgresqlStatusList{
355+
Items: []postgres.PostgresqlStatus{
356+
{
357+
Pod: &corev1.Pod{
358+
ObjectMeta: metav1.ObjectMeta{Name: "pod-1"},
359+
Status: corev1.PodStatus{PodIP: "192.168.1.1"},
360+
},
361+
IsPrimary: true,
362+
TimeLineID: 123,
363+
},
364+
{
365+
Pod: &corev1.Pod{
366+
ObjectMeta: metav1.ObjectMeta{Name: "pod-2"},
367+
Status: corev1.PodStatus{PodIP: "192.168.1.2"},
368+
},
369+
IsPrimary: false,
370+
TimeLineID: 123,
371+
},
372+
},
373+
}
374+
375+
err := env.clusterReconciler.updateClusterStatusThatRequiresInstancesState(ctx, cluster, statuses)
376+
Expect(err).ToNot(HaveOccurred())
377+
378+
Expect(cluster.Status.InstancesReportedState).To(HaveLen(2))
379+
380+
state1 := cluster.Status.InstancesReportedState["pod-1"]
381+
Expect(state1.IsPrimary).To(BeTrue())
382+
Expect(state1.TimeLineID).To(Equal(123))
383+
Expect(state1.IP).To(Equal("192.168.1.1"))
384+
385+
state2 := cluster.Status.InstancesReportedState["pod-2"]
386+
Expect(state2.IsPrimary).To(BeFalse())
387+
Expect(state2.TimeLineID).To(Equal(123))
388+
Expect(state2.IP).To(Equal("192.168.1.2"))
389+
})
390+
})

0 commit comments

Comments
 (0)