Skip to content

Commit cbc46d1

Browse files
authored
Merge pull request #1477 from smallstep/badger-migration
Add tool to migrate data from badger to mysql or postgresql
2 parents 90bac46 + 1755c8d commit cbc46d1

File tree

2 files changed

+333
-0
lines changed

2 files changed

+333
-0
lines changed

scripts/README.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,3 +2,7 @@
22

33
Please note that `install-step-ra.sh` is referenced on the `files.smallstep.com` S3 website bucket as a redirect to `raw.githubusercontent.com`. If you move it, please update the S3 redirect.
44

5+
## badger-migration
6+
7+
badger-migration is a tool that allows migrating data data from BadgerDB (v1 or
8+
v2) to MySQL or PostgreSQL.

scripts/badger-migration/main.go

Lines changed: 329 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,329 @@
1+
package main
2+
3+
import (
4+
"bytes"
5+
"encoding/base64"
6+
"encoding/binary"
7+
"errors"
8+
"flag"
9+
"fmt"
10+
"os"
11+
"path/filepath"
12+
13+
badgerv1 "github.com/dgraph-io/badger"
14+
badgerv2 "github.com/dgraph-io/badger/v2"
15+
16+
"github.com/smallstep/nosql"
17+
)
18+
19+
var (
20+
authorityTables = []string{
21+
"x509_certs",
22+
"x509_certs_data",
23+
"revoked_x509_certs",
24+
"x509_crl",
25+
"revoked_ssh_certs",
26+
"used_ott",
27+
"ssh_certs",
28+
"ssh_hosts",
29+
"ssh_users",
30+
"ssh_host_principals",
31+
}
32+
acmeTables = []string{
33+
"acme_accounts",
34+
"acme_keyID_accountID_index",
35+
"acme_authzs",
36+
"acme_challenges",
37+
"nonces",
38+
"acme_orders",
39+
"acme_account_orders_index",
40+
"acme_certs",
41+
"acme_serial_certs_index",
42+
"acme_external_account_keys",
43+
"acme_external_account_keyID_reference_index",
44+
"acme_external_account_keyID_provisionerID_index",
45+
}
46+
)
47+
48+
func usage(fs *flag.FlagSet) {
49+
name := filepath.Base(os.Args[0])
50+
fmt.Fprintf(os.Stderr, "%s is a tool to migrate data from BadgerDB to MySQL or PostgreSQL.\n", name)
51+
fmt.Fprintln(os.Stderr, "\nUsage:")
52+
fmt.Fprintf(os.Stderr, " %s [-v1|-v2] -dir=<path> [-value-dir=<path>] -type=type -database=<source>\n", name)
53+
fmt.Fprintln(os.Stderr, "\nExamples:")
54+
fmt.Fprintf(os.Stderr, " %s -v1 -dir /var/lib/step-ca/db -type=mysql -database \"user@unix/step_ca\"\n", name)
55+
fmt.Fprintf(os.Stderr, " %s -v2 -dir /var/lib/step-ca/db -type=mysql -database \"user:password@tcp(localhost:3306)/step_ca\"\n", name)
56+
fmt.Fprintf(os.Stderr, " %s -v2 -dir /var/lib/step-ca/db -type=postgresql -database \"user=postgres dbname=step_ca\"\n", name)
57+
fmt.Fprintln(os.Stderr, "\nOptions:")
58+
fs.PrintDefaults()
59+
}
60+
61+
func main() {
62+
var v1, v2 bool
63+
var dir, valueDir string
64+
var typ, database string
65+
var key string
66+
67+
fs := flag.NewFlagSet(os.Args[0], flag.ExitOnError)
68+
69+
fs.BoolVar(&v1, "v1", false, "use badger v1 as the source database")
70+
fs.BoolVar(&v2, "v2", true, "use badger v2 as the source database")
71+
fs.StringVar(&dir, "dir", "", "badger database directory")
72+
fs.StringVar(&valueDir, "value-dir", "", "badger database value directory")
73+
fs.StringVar(&typ, "type", "", "the destination database type to use")
74+
fs.StringVar(&database, "database", "", "the destination driver-specific data source name")
75+
fs.StringVar(&key, "key", "", "the key used to resume the migration")
76+
fs.Usage = func() { usage(fs) }
77+
fs.Parse(os.Args[1:])
78+
79+
switch {
80+
case v1 == v2:
81+
fatal("flag -v1 or -v2 are required")
82+
case dir == "":
83+
fatal("flag -dir is required")
84+
case typ != "postgresql" && typ != "mysql":
85+
fatal(`flag -type must be "postgresql" or "mysql"`)
86+
case database == "":
87+
fatal("flag --database required")
88+
}
89+
90+
var (
91+
err error
92+
v1DB *badgerv1.DB
93+
v2DB *badgerv2.DB
94+
lastKey []byte
95+
)
96+
97+
if key != "" {
98+
if lastKey, err = base64.StdEncoding.DecodeString(key); err != nil {
99+
fatal("error decoding key: %v", err)
100+
}
101+
}
102+
103+
if v1 {
104+
if v1DB, err = badgerV1Open(dir, valueDir); err != nil {
105+
fatal("error opening badger v1 database: %v", err)
106+
}
107+
} else {
108+
if v2DB, err = badgerV2Open(dir, valueDir); err != nil {
109+
fatal("error opening badger v2 database: %v", err)
110+
}
111+
}
112+
113+
db, err := nosql.New(typ, database)
114+
if err != nil {
115+
fatal("error opening %s database: %v", typ, err)
116+
}
117+
118+
allTables := append([]string{}, authorityTables...)
119+
allTables = append(allTables, acmeTables...)
120+
121+
// Convert prefix names to badger key prefixes
122+
badgerKeys := make([][]byte, len(allTables))
123+
for i, name := range allTables {
124+
badgerKeys[i], err = badgerEncode([]byte(name))
125+
if err != nil {
126+
fatal("error encoding table %s: %v", name, err)
127+
}
128+
}
129+
130+
for i, prefix := range badgerKeys {
131+
table := allTables[i]
132+
133+
// With a key flag, resume from that table and prefix
134+
if lastKey != nil {
135+
bucket, _ := parseBadgerEncode(lastKey)
136+
if table != string(bucket) {
137+
fmt.Printf("skipping table %s\n", table)
138+
continue
139+
}
140+
// Continue with a new prefix
141+
prefix = lastKey
142+
lastKey = nil
143+
}
144+
145+
var n int64
146+
fmt.Printf("migrating %s ...", table)
147+
if err := db.CreateTable([]byte(table)); err != nil {
148+
fatal("error creating table %s: %v", table, err)
149+
}
150+
151+
if v1 {
152+
if badgerKey, err := badgerV1Iterate(v1DB, prefix, func(bucket, key, value []byte) error {
153+
n++
154+
return db.Set(bucket, key, value)
155+
}); err != nil {
156+
fmt.Println()
157+
fatal("error inserting into %s: %v\nLast key: %s", table, err, base64.StdEncoding.EncodeToString(badgerKey))
158+
}
159+
} else {
160+
if badgerKey, err := badgerV2Iterate(v2DB, prefix, func(bucket, key, value []byte) error {
161+
n++
162+
return db.Set(bucket, key, value)
163+
}); err != nil {
164+
fmt.Println()
165+
fatal("error inserting into %s: %v\nLast key: %s", table, err, base64.StdEncoding.EncodeToString(badgerKey))
166+
}
167+
}
168+
169+
fmt.Printf(" %d rows\n", n)
170+
}
171+
}
172+
173+
func fatal(format string, args ...any) {
174+
fmt.Fprintf(os.Stderr, format, args...)
175+
fmt.Fprintln(os.Stderr)
176+
os.Exit(1)
177+
}
178+
179+
func badgerV1Open(dir, valueDir string) (*badgerv1.DB, error) {
180+
opts := badgerv1.DefaultOptions(dir)
181+
if valueDir != "" {
182+
opts.ValueDir = valueDir
183+
}
184+
return badgerv1.Open(opts)
185+
}
186+
187+
func badgerV2Open(dir, valueDir string) (*badgerv2.DB, error) {
188+
opts := badgerv2.DefaultOptions(dir)
189+
if valueDir != "" {
190+
opts.ValueDir = valueDir
191+
}
192+
return badgerv2.Open(opts)
193+
}
194+
195+
type Iterator interface {
196+
Seek([]byte)
197+
ValidForPrefix([]byte) bool
198+
Next()
199+
}
200+
201+
type Item interface {
202+
KeyCopy([]byte) []byte
203+
ValueCopy([]byte) ([]byte, error)
204+
}
205+
206+
func badgerV1Iterate(db *badgerv1.DB, prefix []byte, fn func(bucket, key, value []byte) error) (badgerKey []byte, err error) {
207+
err = db.View(func(txn *badgerv1.Txn) error {
208+
it := txn.NewIterator(badgerv1.DefaultIteratorOptions)
209+
defer it.Close()
210+
badgerKey, err = badgerIterate(it, prefix, fn)
211+
return err
212+
})
213+
return
214+
}
215+
216+
func badgerV2Iterate(db *badgerv2.DB, prefix []byte, fn func(bucket, key, value []byte) error) (badgerKey []byte, err error) {
217+
err = db.View(func(txn *badgerv2.Txn) error {
218+
it := txn.NewIterator(badgerv2.DefaultIteratorOptions)
219+
defer it.Close()
220+
badgerKey, err = badgerIterate(it, prefix, fn)
221+
return err
222+
})
223+
return
224+
}
225+
226+
func badgerIterate(it Iterator, prefix []byte, fn func(bucket, key, value []byte) error) ([]byte, error) {
227+
var badgerKey []byte
228+
for it.Seek(prefix); it.ValidForPrefix(prefix); it.Next() {
229+
var item Item
230+
switch itt := it.(type) {
231+
case *badgerv1.Iterator:
232+
item = itt.Item()
233+
case *badgerv2.Iterator:
234+
item = itt.Item()
235+
default:
236+
return badgerKey, fmt.Errorf("unexpected iterator type %T", it)
237+
}
238+
239+
badgerKey = item.KeyCopy(nil)
240+
if isBadgerTable(badgerKey) {
241+
continue
242+
}
243+
244+
bucket, key, err := fromBadgerKey(badgerKey)
245+
if err != nil {
246+
return badgerKey, fmt.Errorf("error converting from badger key %s", badgerKey)
247+
}
248+
value, err := item.ValueCopy(nil)
249+
if err != nil {
250+
return badgerKey, fmt.Errorf("error retrieving contents from database value: %w", err)
251+
}
252+
253+
if err := fn(bucket, key, value); err != nil {
254+
return badgerKey, fmt.Errorf("error exporting %s[%s]=%x", bucket, key, value)
255+
}
256+
}
257+
258+
return badgerKey, nil
259+
}
260+
261+
// badgerEncode encodes a byte slice into a section of a BadgerKey. See
262+
// documentation for toBadgerKey.
263+
func badgerEncode(val []byte) ([]byte, error) {
264+
l := len(val)
265+
switch {
266+
case l == 0:
267+
return nil, errors.New("input cannot be empty")
268+
case l > 65535:
269+
return nil, errors.New("length of input cannot be greater than 65535")
270+
default:
271+
lb := new(bytes.Buffer)
272+
if err := binary.Write(lb, binary.LittleEndian, uint16(l)); err != nil {
273+
return nil, fmt.Errorf("error doing binary Write: %w", err)
274+
}
275+
return append(lb.Bytes(), val...), nil
276+
}
277+
}
278+
279+
// parseBadgerEncode decodes the badger key and returns the bucket and the rest.
280+
func parseBadgerEncode(bk []byte) (value, rest []byte) {
281+
var (
282+
keyLen uint16
283+
start = uint16(2)
284+
length = uint16(len(bk))
285+
)
286+
if uint16(len(bk)) < start {
287+
return nil, bk
288+
}
289+
// First 2 bytes stores the length of the value.
290+
if err := binary.Read(bytes.NewReader(bk[:2]), binary.LittleEndian, &keyLen); err != nil {
291+
return nil, bk
292+
}
293+
end := start + keyLen
294+
switch {
295+
case length < end:
296+
return nil, bk
297+
case length == end:
298+
return bk[start:end], nil
299+
default:
300+
return bk[start:end], bk[end:]
301+
}
302+
}
303+
304+
// isBadgerTable returns True if the slice is a badgerTable token, false
305+
// otherwise. badgerTable means that the slice contains only the [size|value] of
306+
// one section of a badgerKey and no remainder. A badgerKey is [bucket|key],
307+
// while a badgerTable is only the bucket section.
308+
func isBadgerTable(bk []byte) bool {
309+
if k, rest := parseBadgerEncode(bk); len(k) > 0 && len(rest) == 0 {
310+
return true
311+
}
312+
return false
313+
}
314+
315+
// fromBadgerKey returns the bucket and key encoded in a BadgerKey. See
316+
// documentation for toBadgerKey.
317+
func fromBadgerKey(bk []byte) ([]byte, []byte, error) {
318+
bucket, rest := parseBadgerEncode(bk)
319+
if len(bucket) == 0 || len(rest) == 0 {
320+
return nil, nil, fmt.Errorf("invalid badger key: %v", bk)
321+
}
322+
323+
key, rest2 := parseBadgerEncode(rest)
324+
if len(key) == 0 || len(rest2) != 0 {
325+
return nil, nil, fmt.Errorf("invalid badger key: %v", bk)
326+
}
327+
328+
return bucket, key, nil
329+
}

0 commit comments

Comments
 (0)