Skip to content

Commit d50446e

Browse files
committed
backup: updates metric schedule options now also sets updates metric backup option
We currently set two different options on backup that control whether two different metrics are updated: 1. `updates_last_backup_metric` schedule option 2. `updates_cluster_monitoring_metrics` backup option These two should ideally be consolidated into one, but for now we will ensure that setting the former will also set the latter when executing a scheduled backup. Epic: None Release note: None
1 parent 755c186 commit d50446e

File tree

3 files changed

+56
-5
lines changed

3 files changed

+56
-5
lines changed

pkg/backup/create_scheduled_backup_test.go

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -148,26 +148,31 @@ func (h *testHelper) clearSchedules(t *testing.T) {
148148
h.sqlDB.Exec(t, "DELETE FROM system.scheduled_jobs WHERE true")
149149
}
150150

151-
func (h *testHelper) waitForSuccessfulScheduledJob(t *testing.T, scheduleID jobspb.ScheduleID) {
152-
t.Helper()
151+
func (h *testHelper) waitForScheduledJobState(
152+
t *testing.T, scheduleID jobspb.ScheduleID, state jobs.State,
153+
) {
153154
query := "SELECT status FROM " + h.env.SystemJobsTableName() +
154155
" WHERE created_by_type=$1 AND created_by_id=$2 ORDER BY created DESC LIMIT 1"
155156

156157
testutils.SucceedsSoon(t, func() error {
157-
// Force newly created job to be adopted and verify it succeeds.
158158
h.server.JobRegistry().(*jobs.Registry).TestingNudgeAdoptionQueue()
159159
var status string
160160
err := h.sqlDB.DB.QueryRowContext(context.Background(),
161161
query, jobs.CreatedByScheduledJobs, scheduleID).Scan(&status)
162162
if err != nil {
163163
return err
164-
} else if status != string(jobs.StateSucceeded) {
165-
return errors.Newf("expected job to succeed; found %s", status)
164+
} else if status != string(state) {
165+
return errors.Newf("expected job in %s; found %s", state, status)
166166
}
167167
return nil
168168
})
169169
}
170170

171+
func (h *testHelper) waitForSuccessfulScheduledJob(t *testing.T, scheduleID jobspb.ScheduleID) {
172+
t.Helper()
173+
h.waitForScheduledJobState(t, scheduleID, jobs.StateSucceeded)
174+
}
175+
171176
func (h *testHelper) waitForSuccessfulScheduledJobCount(
172177
t *testing.T, scheduleID jobspb.ScheduleID, expectedCount int,
173178
) {

pkg/backup/schedule_exec.go

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -98,6 +98,21 @@ func (e *scheduledBackupExecutor) executeBackup(
9898
}
9999
backupStmt.AsOf = tree.AsOfClause{Expr: endTime}
100100

101+
// We currently set two different options that control whether two different
102+
// backup metrics are updated:
103+
// 1. updates_last_backup_metric on the schedule
104+
// 2. updates_cluster_monitoring_metrics on the backup statement
105+
// We should probably consolidate these two options int one, but for now we
106+
// will ensure that setting updates_last_backup_metric on the schedule also
107+
// sets updates_cluster_monitoring_metrics on the backup statement.
108+
args := &backuppb.ScheduledBackupExecutionArgs{}
109+
if err := pbtypes.UnmarshalAny(sj.ExecutionArgs().Args, args); err != nil {
110+
return errors.Wrap(err, "un-marshaling args")
111+
}
112+
if args.UpdatesLastBackupMetric {
113+
backupStmt.Options.UpdatesClusterMonitoringMetrics = tree.DBoolTrue
114+
}
115+
101116
// Invoke backup plan hook.
102117
hook, cleanup := cfg.PlanHookMaker(ctx, "exec-backup", txn.KV(), sj.Owner())
103118
defer cleanup()

pkg/backup/schedule_exec_test.go

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,13 +8,15 @@ package backup
88
import (
99
"context"
1010
"testing"
11+
"time"
1112

1213
"github.com/cockroachdb/cockroach/pkg/backup/backuppb"
1314
"github.com/cockroachdb/cockroach/pkg/jobs"
1415
"github.com/cockroachdb/cockroach/pkg/jobs/jobspb"
1516
"github.com/cockroachdb/cockroach/pkg/roachpb"
1617
"github.com/cockroachdb/cockroach/pkg/util/hlc"
1718
"github.com/cockroachdb/cockroach/pkg/util/leaktest"
19+
"github.com/cockroachdb/cockroach/pkg/util/log"
1820
"github.com/cockroachdb/cockroach/pkg/util/metric"
1921
pbtypes "github.com/gogo/protobuf/types"
2022
"github.com/stretchr/testify/require"
@@ -58,6 +60,35 @@ func TestBackupSucceededUpdatesMetrics(t *testing.T) {
5860
})
5961
}
6062

63+
func TestBackupFailedUpdatesMetrics(t *testing.T) {
64+
defer leaktest.AfterTest(t)()
65+
defer log.Scope(t).Close(t)
66+
67+
th, cleanup := newTestHelper(t)
68+
defer cleanup()
69+
70+
th.setOverrideAsOfClauseKnob(t)
71+
schedules, err := th.createBackupSchedule(
72+
t,
73+
`CREATE SCHEDULE FOR
74+
BACKUP INTO 'nodelocal://1/backup' WITH kms = 'aws-kms:///not-a-real-kms-jpeg?AUTH=implicit&REGION=us-east-1'
75+
RECURRING '@hourly' FULL BACKUP ALWAYS
76+
WITH SCHEDULE OPTIONS updates_cluster_last_backup_time_metric`,
77+
)
78+
require.NoError(t, err)
79+
require.Len(t, schedules, 1)
80+
schedule := schedules[0]
81+
82+
th.env.SetTime(schedule.NextRun().Add(1 * time.Second))
83+
require.NoError(t, th.executeSchedules())
84+
th.waitForScheduledJobState(t, schedule.ScheduleID(), jobs.StateFailed)
85+
86+
metrics, ok := th.server.JobRegistry().(*jobs.Registry).MetricsStruct().Backup.(*BackupMetrics)
87+
require.True(t, ok)
88+
89+
require.Greater(t, metrics.LastKMSInaccessibleErrorTime.Value(), int64(0))
90+
}
91+
6192
func createSchedule(t *testing.T, updatesLastBackupMetric bool) *jobs.ScheduledJob {
6293
schedule := jobs.NewScheduledJob(nil)
6394

0 commit comments

Comments
 (0)