Skip to content

Commit a345149

Browse files
craig[bot]kev-cao
andcommitted
Merge #152475
152475: backup: normalize subdir format and index paths r=msbutler a=kev-cao This commit ensures that when creating a backup job, the `subdir` is always normalized to start with a leading '/' and to not end with a trailing '/'. As a corollary, the backup index is also updated to store the absolute path from the collection URI root for every backup in the same way. Epic: None Release note: None Co-authored-by: Kevin Cao <[email protected]>
2 parents 9e30767 + 55894f5 commit a345149

File tree

8 files changed

+198
-42
lines changed

8 files changed

+198
-42
lines changed

pkg/backup/backup_job.go

Lines changed: 1 addition & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -9,8 +9,6 @@ import (
99
"context"
1010
"fmt"
1111
"math"
12-
"net/url"
13-
"path"
1412
"reflect"
1513
"sort"
1614
"strconv"
@@ -842,16 +840,10 @@ func (b *backupResumer) Resume(ctx context.Context, execCtx interface{}) error {
842840
// potentially expensive listing of a giant backup collection to find the most
843841
// recent completed entry.
844842
if backupManifest.StartTime.IsEmpty() && details.CollectionURI != "" {
845-
backupURI, err := url.Parse(details.URI)
843+
suffix, err := backuputils.AbsoluteBackupPathInCollectionURI(details.CollectionURI, details.URI)
846844
if err != nil {
847845
return err
848846
}
849-
collectionURI, err := url.Parse(details.CollectionURI)
850-
if err != nil {
851-
return err
852-
}
853-
854-
suffix := strings.TrimPrefix(path.Clean(backupURI.Path), path.Clean(collectionURI.Path))
855847

856848
c, err := p.ExecCfg().DistSQLSrv.ExternalStorageFromURI(ctx, details.CollectionURI, p.User())
857849
if err != nil {

pkg/backup/backup_planning.go

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ import (
1313

1414
"github.com/cockroachdb/cockroach/pkg/backup/backupbase"
1515
"github.com/cockroachdb/cockroach/pkg/backup/backupresolver"
16+
"github.com/cockroachdb/cockroach/pkg/backup/backuputils"
1617
"github.com/cockroachdb/cockroach/pkg/cloud"
1718
"github.com/cockroachdb/cockroach/pkg/featureflag"
1819
"github.com/cockroachdb/cockroach/pkg/jobs"
@@ -636,7 +637,11 @@ func backupPlanHook(
636637
initialDetails.Destination.Subdir = backupbase.LatestFileName
637638
initialDetails.Destination.Exists = true
638639
} else if subdir != "" {
639-
initialDetails.Destination.Subdir = "/" + strings.TrimPrefix(subdir, "/")
640+
normalizedSubdir, err := backuputils.NormalizeSubdir(subdir)
641+
if err != nil {
642+
return err
643+
}
644+
initialDetails.Destination.Subdir = normalizedSubdir
640645
initialDetails.Destination.Exists = true
641646
} else {
642647
initialDetails.Destination.Subdir = endTime.GoTime().Format(backupbase.DateBasedIntoFolderName)

pkg/backup/backup_test.go

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -726,8 +726,15 @@ func TestBackupAndRestoreJobDescription(t *testing.T) {
726726
"BACKUP INTO LATEST IN $4 WITH incremental_location=($1, $2, $3)",
727727
append(incrementals, collections[0])...)
728728

729-
sqlDB.ExpectErr(t, "No full backup exists in \"/subdir\" to append an incremental backup to. To take a full backup, remove the subdirectory from the backup command",
730-
"BACKUP INTO $4 IN ($1, $2, $3)", append(collections, "subdir")...)
729+
nonExistentFullDir := time.Date(2000, 1, 1, 0, 0, 0, 0, time.UTC).Format(backupbase.DateBasedIntoFolderName)
730+
sqlDB.ExpectErr(
731+
t,
732+
fmt.Sprintf(
733+
`No full backup exists in "%s" to append an incremental backup to. To take a full backup, remove the subdirectory from the backup command`,
734+
nonExistentFullDir,
735+
),
736+
"BACKUP INTO $4 IN ($1, $2, $3)", append(collections, nonExistentFullDir)...,
737+
)
731738

732739
time.Sleep(time.Second + 2)
733740
sqlDB.Exec(t, "BACKUP INTO ($1, $2, $3) AS OF SYSTEM TIME '-1s'", collections...)
@@ -757,7 +764,7 @@ func TestBackupAndRestoreJobDescription(t *testing.T) {
757764
},
758765
)
759766
sqlDB.CheckQueryResults(t, "SELECT description FROM crdb_internal.jobs WHERE job_type = 'BACKUP' AND status = 'failed'",
760-
[][]string{{fmt.Sprintf("BACKUP INTO '%s' IN ('%s', '%s', '%s')", "/subdir", collections[0],
767+
[][]string{{fmt.Sprintf("BACKUP INTO '%s' IN ('%s', '%s', '%s')", nonExistentFullDir, collections[0],
761768
collections[1], collections[2])}})
762769

763770
sqlDB.Exec(t, "DROP DATABASE data CASCADE")

pkg/backup/backupdest/backup_index.go

Lines changed: 1 addition & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -62,22 +62,7 @@ func WriteBackupIndexMetadata(
6262
return errors.AssertionFailedf("incremental backup details missing a start time")
6363
}
6464

65-
var backupCollectionURI string
66-
// Find the root of the collection URI that the backup is being written to so
67-
// that we can determine the relative path of the backup.
68-
if details.StartTime.IsEmpty() {
69-
backupCollectionURI = details.CollectionURI
70-
} else {
71-
var err error
72-
backupCollectionURI, err = ResolveDefaultBaseIncrementalStorageLocation(
73-
details.Destination.To, details.Destination.IncrementalStorage,
74-
)
75-
if err != nil {
76-
return errors.Wrapf(err, "get incremental backup collection URI")
77-
}
78-
}
79-
80-
path, err := backuputils.RelativeBackupPathInCollectionURI(backupCollectionURI, details.URI)
65+
path, err := backuputils.AbsoluteBackupPathInCollectionURI(details.CollectionURI, details.URI)
8166
if err != nil {
8267
return errors.Wrapf(err, "get relative backup path")
8368
}

pkg/backup/backupdest/backup_index_test.go

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -215,11 +215,6 @@ func TestWriteBackupIndexMetadataWithLocalityAwareBackups(t *testing.T) {
215215

216216
require.True(t, fullIndex.StartTime.IsEmpty())
217217
require.False(t, incrIndex.StartTime.IsEmpty())
218-
219-
// Ensure that the incremental path index starts immediately with the full
220-
// index path, implying that its path starts from the root of the incremental
221-
// storage directory.
222-
require.True(t, strings.HasPrefix(incrIndex.Path, fullIndex.Path))
223218
}
224219

225220
func TestWriteBackupindexMetadataWithSpecifiedIncrementalLocation(t *testing.T) {

pkg/backup/backuputils/BUILD.bazel

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,10 @@ go_library(
2121

2222
go_test(
2323
name = "backuputils_test",
24-
srcs = ["memory_backed_quota_pool_test.go"],
24+
srcs = [
25+
"memory_backed_quota_pool_test.go",
26+
"utils_test.go",
27+
],
2528
embed = [":backuputils"],
2629
deps = [
2730
"//pkg/settings/cluster",

pkg/backup/backuputils/utils.go

Lines changed: 49 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -9,11 +9,13 @@ import (
99
"encoding/hex"
1010
"net/url"
1111
"path"
12+
"regexp"
1213
"strings"
1314
"time"
1415

1516
"github.com/cockroachdb/cockroach/pkg/cloud"
1617
"github.com/cockroachdb/cockroach/pkg/util/encoding"
18+
"github.com/cockroachdb/errors"
1719
)
1820

1921
// URLSeparator represents the standard separator used in backup URLs.
@@ -100,16 +102,18 @@ func EncodeDescendingTS(ts time.Time) string {
100102
return hex.EncodeToString(buffer)
101103
}
102104

103-
// RelativeBackupPathInCollectionURI returns the relative path of a backup
104-
// within a collection URI. Backup URI represents the URI that points to the
105-
// directory containing the backup manifest of the backup.
105+
// AbsoluteBackupPathInCollectionURI returns the absolute path of a backup
106+
// assuming the root is the collection URI. Backup URI represents the URI that
107+
// points to the directory containing the backup manifest of the backup. Since
108+
// this is an absolute path, it always starts with `/`. Any trailing slash is
109+
// also removed.
106110
//
107111
// Example:
108112
//
109113
// collectionURI: "nodelocal://1/collection"
110-
// backupURI: "nodelocal://1/collection/backup1/"
111-
// returns: "backup1/"
112-
func RelativeBackupPathInCollectionURI(collectionURI string, backupURI string) (string, error) {
114+
// backupURI: "nodelocal://1/collection/path/to/backup"
115+
// returns: "/path/to/backup"
116+
func AbsoluteBackupPathInCollectionURI(collectionURI string, backupURI string) (string, error) {
113117
backupURL, err := url.Parse(backupURI)
114118
if err != nil {
115119
return "", err
@@ -119,6 +123,44 @@ func RelativeBackupPathInCollectionURI(collectionURI string, backupURI string) (
119123
return "", err
120124
}
121125

122-
relPath := strings.TrimPrefix(path.Clean(backupURL.Path), path.Clean(collectionURL.Path))
126+
if backupURL.Scheme != collectionURL.Scheme || backupURL.Host != collectionURL.Host {
127+
return "", errors.New("backup URI does not share the same scheme and host as collection URI")
128+
}
129+
130+
collectionPath := path.Clean(collectionURL.Path)
131+
if collectionPath == "." {
132+
collectionPath = ""
133+
}
134+
135+
backupPath := path.Clean(backupURL.Path)
136+
if backupPath == "." {
137+
backupPath = ""
138+
}
139+
140+
relPath, found := strings.CutPrefix(backupPath, collectionPath)
141+
if !found {
142+
return "", errors.New("backup URI not contained within collection URI")
143+
}
144+
145+
relPath = strings.TrimSuffix(relPath, string(URLSeparator))
146+
if len(relPath) == 0 || relPath[0] != URLSeparator {
147+
relPath = string(URLSeparator) + relPath
148+
}
123149
return relPath, nil
124150
}
151+
152+
// NormalizeSubdir takes a provided full backup subdirectory and normalizes it
153+
// to the form /YYYY/MM/DD-HHMMSS.SS with a leading slash and no trailing slash.
154+
func NormalizeSubdir(subdir string) (string, error) {
155+
subdirPattern := regexp.MustCompile(`\/?\d{4}\/\d{2}\/\d{2}-\d{6}\.\d{2}\/?`)
156+
if !subdirPattern.Match([]byte(subdir)) {
157+
return "", errors.Newf(
158+
`provided subdir "%s" does not match expected format YYYY/MM/DD-HHMMSS.SS`, subdir,
159+
)
160+
}
161+
normalized := strings.TrimSuffix(subdir, string(URLSeparator))
162+
if normalized[0] != URLSeparator {
163+
normalized = string(URLSeparator) + normalized
164+
}
165+
return normalized, nil
166+
}
Lines changed: 127 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,127 @@
1+
// Copyright 2022 The Cockroach Authors.
2+
//
3+
// Use of this software is governed by the CockroachDB Software License
4+
// included in the /LICENSE file.
5+
6+
package backuputils
7+
8+
import (
9+
"testing"
10+
11+
"github.com/stretchr/testify/require"
12+
)
13+
14+
func TestAbsoluteBackupPathInCollectionURI(t *testing.T) {
15+
testcases := []struct {
16+
name string
17+
collectionURI, backupURI string
18+
expected string
19+
error string
20+
}{
21+
{
22+
name: "collection URI rooted at /",
23+
collectionURI: "nodelocal://1/",
24+
backupURI: "nodelocal://1/foo/bar/baz",
25+
expected: "/foo/bar/baz",
26+
},
27+
{
28+
name: "backup URI contains trailing slash",
29+
collectionURI: "nodelocal://1/",
30+
backupURI: "nodelocal://1/foo/bar/baz/",
31+
expected: "/foo/bar/baz",
32+
},
33+
{
34+
name: "collection URI contains non-empty path",
35+
collectionURI: "s3://backup/foo/bar",
36+
backupURI: "s3://backup/foo/bar/baz",
37+
expected: "/baz",
38+
},
39+
{
40+
name: "collection and backup URI ends in trailing slash",
41+
collectionURI: "external://backup/foo/bar/",
42+
backupURI: "external://backup/foo/bar/baz/qux/",
43+
expected: "/baz/qux",
44+
},
45+
{
46+
name: "backupURI is the same as collection URI",
47+
collectionURI: "gs://backup/foo/bar",
48+
backupURI: "gs://backup/foo/bar",
49+
expected: "/",
50+
},
51+
{
52+
name: "backup URI not in collection URI",
53+
collectionURI: "gs://backup/foo/bar",
54+
backupURI: "gs://backup/baz",
55+
error: "backup URI not contained within collection URI",
56+
},
57+
{
58+
name: "backup URI does not share same scheme or host as collection URI",
59+
collectionURI: "nodelocal://1/foo/bar",
60+
backupURI: "gs://backup/foo/bar/baz",
61+
error: "backup URI does not share the same scheme and host as collection URI",
62+
},
63+
}
64+
65+
for _, tc := range testcases {
66+
t.Run(tc.name, func(t *testing.T) {
67+
result, err := AbsoluteBackupPathInCollectionURI(tc.collectionURI, tc.backupURI)
68+
if tc.error != "" {
69+
require.Error(t, err)
70+
require.ErrorContains(t, err, tc.error)
71+
return
72+
}
73+
74+
require.NoError(t, err)
75+
require.Equal(t, tc.expected, result)
76+
})
77+
}
78+
}
79+
80+
func TestNormalizeSubdir(t *testing.T) {
81+
testcases := []struct {
82+
name string
83+
input string
84+
normalized string
85+
error string
86+
}{
87+
{
88+
name: "subdir without leading slash",
89+
input: "2025/08/26-112233.44",
90+
normalized: "/2025/08/26-112233.44",
91+
},
92+
{
93+
name: "subdir with leading and trailing slashes",
94+
input: "/2025/08/26-112233.44/",
95+
normalized: "/2025/08/26-112233.44",
96+
},
97+
{
98+
name: "subdir with only trailing slash",
99+
input: "2025/08/26-112233.44/",
100+
normalized: "/2025/08/26-112233.44",
101+
},
102+
{
103+
name: "subdir already normalized",
104+
input: "/2025/08/26-112233.44",
105+
normalized: "/2025/08/26-112233.44",
106+
},
107+
{
108+
name: "invalid subdir format",
109+
input: "2023-10-05_123456.78",
110+
error: "does not match expected format YYYY/MM/DD-HHMMSS.SS",
111+
},
112+
}
113+
114+
for _, tc := range testcases {
115+
t.Run(tc.name, func(t *testing.T) {
116+
result, err := NormalizeSubdir(tc.input)
117+
if tc.error != "" {
118+
require.Error(t, err)
119+
require.ErrorContains(t, err, tc.error)
120+
return
121+
}
122+
123+
require.NoError(t, err)
124+
require.Equal(t, tc.normalized, result)
125+
})
126+
}
127+
}

0 commit comments

Comments
 (0)