Skip to content

Commit 551a3cd

Browse files
feat: last failed backup status field and metric (#467)
Signed-off-by: Leonardo Cecchi <[email protected]> Signed-off-by: Gabriele Bartolini <[email protected]> Co-authored-by: Gabriele Bartolini <[email protected]>
1 parent 32a5539 commit 551a3cd

File tree

7 files changed

+79
-4
lines changed

7 files changed

+79
-4
lines changed

api/v1/objectstore_types.go

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,9 @@ type RecoveryWindow struct {
7575

7676
// The last successful backup time
7777
LastSuccessfulBackupTime *metav1.Time `json:"lastSuccussfulBackupTime,omitempty"`
78+
79+
// The last failed backup time
80+
LastFailedBackupTime *metav1.Time `json:"lastFailedBackupTime,omitempty"`
7881
}
7982

8083
// +kubebuilder:object:root=true

api/v1/zz_generated.deepcopy.go

Lines changed: 4 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

config/crd/bases/barmancloud.cnpg.io_objectstores.yaml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -609,6 +609,10 @@ spec:
609609
restored.
610610
format: date-time
611611
type: string
612+
lastFailedBackupTime:
613+
description: The last failed backup time
614+
format: date-time
615+
type: string
612616
lastSuccussfulBackupTime:
613617
description: The last successful backup time
614618
format: date-time

internal/cnpgi/instance/backup.go

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ import (
1515
"github.com/cloudnative-pg/machinery/pkg/log"
1616
pgTime "github.com/cloudnative-pg/machinery/pkg/postgres/time"
1717
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
18+
"k8s.io/client-go/util/retry"
1819
"sigs.k8s.io/controller-runtime/pkg/client"
1920

2021
barmancloudv1 "github.com/cloudnative-pg/plugin-barman-cloud/api/v1"
@@ -101,6 +102,13 @@ func (b BackupServiceImplementation) Backup(
101102
postgres.BackupTemporaryDirectory,
102103
); err != nil {
103104
contextLogger.Error(err, "while taking backup")
105+
106+
if failureHandlerError := b.handleBackupError(ctx, configuration); failureHandlerError != nil {
107+
contextLogger.Error(
108+
failureHandlerError,
109+
"Error while handling backup failure, skipping. "+
110+
"BarmanObjectStore object may be not up to date.")
111+
}
104112
return nil, err
105113
}
106114

@@ -166,3 +174,18 @@ func (b BackupServiceImplementation) Backup(
166174
Metadata: newBackupResultMetadata(configuration.Cluster.ObjectMeta.UID, executedBackupInfo.TimeLine).toMap(),
167175
}, nil
168176
}
177+
178+
func (b BackupServiceImplementation) handleBackupError(ctx context.Context, cfg *config.PluginConfiguration) error {
179+
return retry.RetryOnConflict(
180+
retry.DefaultBackoff,
181+
func() error {
182+
return setLastFailedBackupTime(
183+
ctx,
184+
b.Client,
185+
cfg.GetBarmanObjectKey(),
186+
cfg.ServerName,
187+
time.Now(),
188+
)
189+
},
190+
)
191+
}

internal/cnpgi/instance/metrics.go

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@ func buildFqName(name string) string {
3131
var (
3232
firstRecoverabilityPointMetricName = buildFqName("first_recoverability_point")
3333
lastAvailableBackupTimestampMetricName = buildFqName("last_available_backup_timestamp")
34+
lastFailedBackupTimestampMetricName = buildFqName("last_failed_backup_timestamp")
3435
)
3536

3637
func (m metricsImpl) GetCapabilities(
@@ -72,6 +73,11 @@ func (m metricsImpl) Define(
7273
Help: "The last available backup as a unix timestamp",
7374
ValueType: &metrics.MetricType{Type: metrics.MetricType_TYPE_GAUGE},
7475
},
76+
{
77+
FqName: lastFailedBackupTimestampMetricName,
78+
Help: "The last failed backup as a unix timestamp",
79+
ValueType: &metrics.MetricType{Type: metrics.MetricType_TYPE_GAUGE},
80+
},
7581
},
7682
}, nil
7783
}
@@ -107,18 +113,26 @@ func (m metricsImpl) Collect(
107113
FqName: lastAvailableBackupTimestampMetricName,
108114
Value: 0,
109115
},
116+
{
117+
FqName: lastFailedBackupTimestampMetricName,
118+
Value: 0,
119+
},
110120
},
111121
}, nil
112122
}
113123

114124
var firstRecoverabilityPoint float64
115125
var lastAvailableBackup float64
126+
var lastFailedBackup float64
116127
if x.FirstRecoverabilityPoint != nil {
117128
firstRecoverabilityPoint = float64(x.FirstRecoverabilityPoint.Unix())
118129
}
119130
if x.LastSuccessfulBackupTime != nil {
120131
lastAvailableBackup = float64(x.LastSuccessfulBackupTime.Unix())
121132
}
133+
if x.LastFailedBackupTime != nil {
134+
lastFailedBackup = float64(x.LastFailedBackupTime.Unix())
135+
}
122136

123137
return &metrics.CollectMetricsResult{
124138
Metrics: []*metrics.CollectMetric{
@@ -130,6 +144,10 @@ func (m metricsImpl) Collect(
130144
FqName: lastAvailableBackupTimestampMetricName,
131145
Value: lastAvailableBackup,
132146
},
147+
{
148+
FqName: lastFailedBackupTimestampMetricName,
149+
Value: lastFailedBackup,
150+
},
133151
},
134152
}, nil
135153
}

internal/cnpgi/instance/recovery_window.go

Lines changed: 26 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ import (
66

77
"github.com/cloudnative-pg/barman-cloud/pkg/catalog"
88
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
9+
"k8s.io/client-go/util/retry"
910
"k8s.io/utils/ptr"
1011
"sigs.k8s.io/controller-runtime/pkg/client"
1112

@@ -29,10 +30,9 @@ func updateRecoveryWindow(
2930
return ptr.To(metav1.NewTime(*t))
3031
}
3132

32-
recoveryWindow := barmancloudv1.RecoveryWindow{
33-
FirstRecoverabilityPoint: convertTime(backupList.GetFirstRecoverabilityPoint()),
34-
LastSuccessfulBackupTime: convertTime(backupList.GetLastSuccessfulBackupTime()),
35-
}
33+
recoveryWindow := objectStore.Status.ServerRecoveryWindow[serverName]
34+
recoveryWindow.FirstRecoverabilityPoint = convertTime(backupList.GetFirstRecoverabilityPoint())
35+
recoveryWindow.LastSuccessfulBackupTime = convertTime(backupList.GetLastSuccessfulBackupTime())
3636

3737
if objectStore.Status.ServerRecoveryWindow == nil {
3838
objectStore.Status.ServerRecoveryWindow = make(map[string]barmancloudv1.RecoveryWindow)
@@ -41,3 +41,25 @@ func updateRecoveryWindow(
4141

4242
return c.Status().Update(ctx, objectStore)
4343
}
44+
45+
// setLastFailedBackupTime sets the last failed backup time in the
46+
// passed object store, for the passed server name.
47+
func setLastFailedBackupTime(
48+
ctx context.Context,
49+
c client.Client,
50+
objectStoreKey client.ObjectKey,
51+
serverName string,
52+
lastFailedBackupTime time.Time,
53+
) error {
54+
return retry.RetryOnConflict(retry.DefaultBackoff, func() error {
55+
var objectStore barmancloudv1.ObjectStore
56+
57+
if err := c.Get(ctx, objectStoreKey, &objectStore); err != nil {
58+
return err
59+
}
60+
recoveryWindow := objectStore.Status.ServerRecoveryWindow[serverName]
61+
recoveryWindow.LastFailedBackupTime = ptr.To(metav1.NewTime(lastFailedBackupTime))
62+
objectStore.Status.ServerRecoveryWindow[serverName] = recoveryWindow
63+
return c.Status().Update(ctx, &objectStore)
64+
})
65+
}

web/docs/plugin-barman-cloud.v1.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -101,5 +101,6 @@ _Appears in:_
101101
| --- | --- | --- | --- | --- |
102102
| `firstRecoverabilityPoint` _[Time](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.32/#time-v1-meta)_ | The first recoverability point in a PostgreSQL server refers to<br />the earliest point in time to which the database can be<br />restored. | True | | |
103103
| `lastSuccussfulBackupTime` _[Time](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.32/#time-v1-meta)_ | The last successful backup time | True | | |
104+
| `lastFailedBackupTime` _[Time](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.32/#time-v1-meta)_ | The last failed backup time | True | | |
104105

105106

0 commit comments

Comments
 (0)