Skip to content

Commit 9cb34f2

Browse files
committed
backup: updates metric schedule options now also sets updates metric backup option
We currently set two different options on backup that control whether two different metrics are updated: 1. `updates_last_backup_metric` schedule option 2. `updates_cluster_monitoring_metrics` backup option These two should ideally be consolidated into one, but for now we will ensure that setting the former will also set the latter when executing a scheduled backup. Epic: None Release note: None
1 parent e50d793 commit 9cb34f2

File tree

4 files changed

+117
-7
lines changed

4 files changed

+117
-7
lines changed

pkg/ccl/backupccl/BUILD.bazel

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -198,6 +198,7 @@ go_test(
198198
"restore_span_covering_test.go",
199199
"restore_test.go",
200200
"revision_reader_test.go",
201+
"schedule_exec_test.go",
201202
"schedule_pts_chaining_test.go",
202203
"show_test.go",
203204
"system_schema_test.go",
@@ -316,6 +317,7 @@ go_test(
316317
"//pkg/util/log",
317318
"//pkg/util/log/eventpb",
318319
"//pkg/util/log/logpb",
320+
"//pkg/util/metric",
319321
"//pkg/util/mon",
320322
"//pkg/util/protoutil",
321323
"//pkg/util/randutil",

pkg/ccl/backupccl/create_scheduled_backup_test.go

Lines changed: 19 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -143,19 +143,31 @@ func (h *testHelper) clearSchedules(t *testing.T) {
143143
h.sqlDB.Exec(t, "DELETE FROM system.scheduled_jobs WHERE true")
144144
}
145145

146-
func (h *testHelper) waitForSuccessfulScheduledJob(t *testing.T, scheduleID jobspb.ScheduleID) {
147-
query := "SELECT id FROM " + h.env.SystemJobsTableName() +
148-
" WHERE status=$1 AND created_by_type=$2 AND created_by_id=$3"
146+
func (h *testHelper) waitForScheduledJobState(
147+
t *testing.T, scheduleID jobspb.ScheduleID, state jobs.Status,
148+
) {
149+
query := "SELECT status FROM " + h.env.SystemJobsTableName() +
150+
" WHERE created_by_type=$1 AND created_by_id=$2 ORDER BY created DESC LIMIT 1"
149151

150152
testutils.SucceedsSoon(t, func() error {
151-
// Force newly created job to be adopted and verify it succeeds.
152153
h.server.JobRegistry().(*jobs.Registry).TestingNudgeAdoptionQueue()
153-
var unused int64
154-
return h.sqlDB.DB.QueryRowContext(context.Background(),
155-
query, jobs.StatusSucceeded, jobs.CreatedByScheduledJobs, scheduleID).Scan(&unused)
154+
var status string
155+
err := h.sqlDB.DB.QueryRowContext(context.Background(),
156+
query, jobs.CreatedByScheduledJobs, scheduleID).Scan(&status)
157+
if err != nil {
158+
return err
159+
} else if status != string(state) {
160+
return errors.Newf("expected job in %s; found %s", state, status)
161+
}
162+
return nil
156163
})
157164
}
158165

166+
func (h *testHelper) waitForSuccessfulScheduledJob(t *testing.T, scheduleID jobspb.ScheduleID) {
167+
t.Helper()
168+
h.waitForScheduledJobState(t, scheduleID, jobs.StatusSucceeded)
169+
}
170+
159171
func (h *testHelper) waitForSuccessfulScheduledJobCount(
160172
t *testing.T, scheduleID jobspb.ScheduleID, expectedCount int,
161173
) {

pkg/ccl/backupccl/schedule_exec.go

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -97,6 +97,21 @@ func (e *scheduledBackupExecutor) executeBackup(
9797
}
9898
backupStmt.AsOf = tree.AsOfClause{Expr: endTime}
9999

100+
// We currently set two different options that control whether two different
101+
// backup metrics are updated:
102+
// 1. updates_last_backup_metric on the schedule
103+
// 2. updates_cluster_monitoring_metrics on the backup statement
104+
// We should probably consolidate these two options int one, but for now we
105+
// will ensure that setting updates_last_backup_metric on the schedule also
106+
// sets updates_cluster_monitoring_metrics on the backup statement.
107+
args := &backuppb.ScheduledBackupExecutionArgs{}
108+
if err := pbtypes.UnmarshalAny(sj.ExecutionArgs().Args, args); err != nil {
109+
return errors.Wrap(err, "un-marshaling args")
110+
}
111+
if args.UpdatesLastBackupMetric {
112+
backupStmt.Options.UpdatesClusterMonitoringMetrics = tree.DBoolTrue
113+
}
114+
100115
// Invoke backup plan hook.
101116
hook, cleanup := cfg.PlanHookMaker(ctx, "exec-backup", txn.KV(), sj.Owner())
102117
defer cleanup()
Lines changed: 81 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,81 @@
1+
// Copyright 2025 The Cockroach Authors.
2+
//
3+
// Use of this software is governed by the CockroachDB Software License
4+
// included in the /LICENSE file.
5+
6+
package backupccl
7+
8+
import (
9+
"context"
10+
"testing"
11+
"time"
12+
13+
"github.com/cockroachdb/cockroach/pkg/ccl/backupccl/backuppb"
14+
"github.com/cockroachdb/cockroach/pkg/jobs"
15+
"github.com/cockroachdb/cockroach/pkg/jobs/jobspb"
16+
"github.com/cockroachdb/cockroach/pkg/util/hlc"
17+
"github.com/cockroachdb/cockroach/pkg/util/leaktest"
18+
"github.com/cockroachdb/cockroach/pkg/util/log"
19+
"github.com/cockroachdb/cockroach/pkg/util/metric"
20+
pbtypes "github.com/gogo/protobuf/types"
21+
"github.com/stretchr/testify/require"
22+
)
23+
24+
func TestBackupSucceededUpdatesMetrics(t *testing.T) {
25+
defer leaktest.AfterTest(t)()
26+
ctx := context.Background()
27+
executor := &scheduledBackupExecutor{
28+
metrics: backupMetrics{
29+
RpoMetric: metric.NewGauge(metric.Metadata{}),
30+
},
31+
}
32+
33+
schedule := createSchedule(t, true)
34+
endTime := hlc.Timestamp{WallTime: hlc.UnixNano()}
35+
details := jobspb.BackupDetails{EndTime: endTime}
36+
37+
err := executor.backupSucceeded(ctx, nil, schedule, details, nil)
38+
require.NoError(t, err)
39+
require.Equal(t, endTime.GoTime().Unix(), executor.metrics.RpoMetric.Value())
40+
}
41+
42+
func TestBackupFailedUpdatesMetrics(t *testing.T) {
43+
defer leaktest.AfterTest(t)()
44+
defer log.Scope(t).Close(t)
45+
46+
th, cleanup := newTestHelper(t)
47+
defer cleanup()
48+
49+
th.setOverrideAsOfClauseKnob(t)
50+
schedules, err := th.createBackupSchedule(
51+
t,
52+
`CREATE SCHEDULE FOR
53+
BACKUP INTO 'nodelocal://1/backup' WITH kms = 'aws-kms:///not-a-real-kms-jpeg?AUTH=implicit&REGION=us-east-1'
54+
RECURRING '@hourly' FULL BACKUP ALWAYS
55+
WITH SCHEDULE OPTIONS updates_cluster_last_backup_time_metric`,
56+
)
57+
require.NoError(t, err)
58+
require.Len(t, schedules, 1)
59+
schedule := schedules[0]
60+
61+
th.env.SetTime(schedule.NextRun().Add(1 * time.Second))
62+
require.NoError(t, th.executeSchedules())
63+
th.waitForScheduledJobState(t, schedule.ScheduleID(), jobs.StatusFailed)
64+
65+
metrics, ok := th.server.JobRegistry().(*jobs.Registry).MetricsStruct().Backup.(*BackupMetrics)
66+
require.True(t, ok)
67+
68+
require.Greater(t, metrics.LastKMSInaccessibleErrorTime.Value(), int64(0))
69+
}
70+
71+
func createSchedule(t *testing.T, updatesLastBackupMetric bool) *jobs.ScheduledJob {
72+
schedule := jobs.NewScheduledJob(nil)
73+
74+
args := &backuppb.ScheduledBackupExecutionArgs{
75+
UpdatesLastBackupMetric: updatesLastBackupMetric,
76+
}
77+
any, err := pbtypes.MarshalAny(args)
78+
require.NoError(t, err)
79+
schedule.SetExecutionDetails(schedule.ExecutorType(), jobspb.ExecutionArguments{Args: any})
80+
return schedule
81+
}

0 commit comments

Comments
 (0)