Skip to content

Commit 2ed1221

Browse files
committed
feat: last failed backup status field and metric
Signed-off-by: Leonardo Cecchi <[email protected]>
1 parent 33172b6 commit 2ed1221

File tree

6 files changed

+77
-0
lines changed

6 files changed

+77
-0
lines changed

api/v1/objectstore_types.go

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,9 @@ type RecoveryWindow struct {
7575

7676
// The last successful backup time
7777
LastSuccessfulBackupTime *metav1.Time `json:"lastSuccussfulBackupTime,omitempty"`
78+
79+
// The last failed backup time
80+
LastFailedBackupTime *metav1.Time `json:"lastFailedBackupTime,omitempty"`
7881
}
7982

8083
// +kubebuilder:object:root=true

api/v1/zz_generated.deepcopy.go

Lines changed: 4 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

config/crd/bases/barmancloud.cnpg.io_objectstores.yaml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -609,6 +609,10 @@ spec:
609609
restored.
610610
format: date-time
611611
type: string
612+
lastFailedBackupTime:
613+
description: The last failed backup time
614+
format: date-time
615+
type: string
612616
lastSuccussfulBackupTime:
613617
description: The last successful backup time
614618
format: date-time

internal/cnpgi/instance/backup.go

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ import (
1515
"github.com/cloudnative-pg/machinery/pkg/log"
1616
pgTime "github.com/cloudnative-pg/machinery/pkg/postgres/time"
1717
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
18+
"k8s.io/client-go/util/retry"
1819
"sigs.k8s.io/controller-runtime/pkg/client"
1920

2021
barmancloudv1 "github.com/cloudnative-pg/plugin-barman-cloud/api/v1"
@@ -101,6 +102,13 @@ func (b BackupServiceImplementation) Backup(
101102
postgres.BackupTemporaryDirectory,
102103
); err != nil {
103104
contextLogger.Error(err, "while taking backup")
105+
106+
if failureHandlerError := b.handleBackupError(ctx, configuration); failureHandlerError != nil {
107+
contextLogger.Error(
108+
failureHandlerError,
109+
"Error while handling backup failure, skipping. "+
110+
"BarmanObjectStore object may be not up to date.")
111+
}
104112
return nil, err
105113
}
106114

@@ -166,3 +174,18 @@ func (b BackupServiceImplementation) Backup(
166174
Metadata: newBackupResultMetadata(configuration.Cluster.ObjectMeta.UID, executedBackupInfo.TimeLine).toMap(),
167175
}, nil
168176
}
177+
178+
func (b BackupServiceImplementation) handleBackupError(ctx context.Context, cfg *config.PluginConfiguration) error {
179+
return retry.RetryOnConflict(
180+
retry.DefaultBackoff,
181+
func() error {
182+
return setLastFailedBackupTime(
183+
ctx,
184+
b.Client,
185+
cfg.GetBarmanObjectKey(),
186+
cfg.ServerName,
187+
time.Now(),
188+
)
189+
},
190+
)
191+
}

internal/cnpgi/instance/metrics.go

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@ func buildFqName(name string) string {
3131
var (
3232
firstRecoverabilityPointMetricName = buildFqName("first_recoverability_point")
3333
lastAvailableBackupTimestampMetricName = buildFqName("last_available_backup_timestamp")
34+
lastFailedBackupTimestampMetricName = buildFqName("last_failed_backup_timestamp")
3435
)
3536

3637
func (m metricsImpl) GetCapabilities(
@@ -72,6 +73,11 @@ func (m metricsImpl) Define(
7273
Help: "The last available backup as a unix timestamp",
7374
ValueType: &metrics.MetricType{Type: metrics.MetricType_TYPE_GAUGE},
7475
},
76+
{
77+
FqName: lastFailedBackupTimestampMetricName,
78+
Help: "The last failed backup as a unix timestamp",
79+
ValueType: &metrics.MetricType{Type: metrics.MetricType_TYPE_GAUGE},
80+
},
7581
},
7682
}, nil
7783
}
@@ -107,18 +113,28 @@ func (m metricsImpl) Collect(
107113
FqName: lastAvailableBackupTimestampMetricName,
108114
Value: 0,
109115
},
116+
{
117+
FqName: lastFailedBackupTimestampMetricName,
118+
Value: 0,
119+
},
110120
},
111121
}, nil
112122
}
113123

124+
fmt.Println("Arriva ", x)
125+
114126
var firstRecoverabilityPoint float64
115127
var lastAvailableBackup float64
128+
var lastFailedBackup float64
116129
if x.FirstRecoverabilityPoint != nil {
117130
firstRecoverabilityPoint = float64(x.FirstRecoverabilityPoint.Unix())
118131
}
119132
if x.LastSuccessfulBackupTime != nil {
120133
lastAvailableBackup = float64(x.LastSuccessfulBackupTime.Unix())
121134
}
135+
if x.LastFailedBackupTime != nil {
136+
lastFailedBackup = float64(x.LastFailedBackupTime.Unix())
137+
}
122138

123139
return &metrics.CollectMetricsResult{
124140
Metrics: []*metrics.CollectMetric{
@@ -130,6 +146,10 @@ func (m metricsImpl) Collect(
130146
FqName: lastAvailableBackupTimestampMetricName,
131147
Value: lastAvailableBackup,
132148
},
149+
{
150+
FqName: lastFailedBackupTimestampMetricName,
151+
Value: lastFailedBackup,
152+
},
133153
},
134154
}, nil
135155
}

internal/cnpgi/instance/recovery_window.go

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ import (
66

77
"github.com/cloudnative-pg/barman-cloud/pkg/catalog"
88
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
9+
"k8s.io/client-go/util/retry"
910
"k8s.io/utils/ptr"
1011
"sigs.k8s.io/controller-runtime/pkg/client"
1112

@@ -41,3 +42,25 @@ func updateRecoveryWindow(
4142

4243
return c.Status().Update(ctx, objectStore)
4344
}
45+
46+
// setLastFailedBackupTime sets the last failed backup time in the
47+
// passed object store, for the passed server name.
48+
func setLastFailedBackupTime(
49+
ctx context.Context,
50+
c client.Client,
51+
objectStoreKey client.ObjectKey,
52+
serverName string,
53+
lastFailedBackupTime time.Time,
54+
) error {
55+
return retry.RetryOnConflict(retry.DefaultBackoff, func() error {
56+
var objectStore barmancloudv1.ObjectStore
57+
58+
if err := c.Get(ctx, objectStoreKey, &objectStore); err != nil {
59+
return err
60+
}
61+
recoveryWindow := objectStore.Status.ServerRecoveryWindow[serverName]
62+
recoveryWindow.LastFailedBackupTime = ptr.To(metav1.NewTime(lastFailedBackupTime))
63+
objectStore.Status.ServerRecoveryWindow[serverName] = recoveryWindow
64+
return c.Status().Update(ctx, &objectStore)
65+
})
66+
}

0 commit comments

Comments
 (0)