Skip to content

Commit 674c0a7

Browse files
authored
fix crash loop on missing manifest tables (#2198)
Please see https://discuss.hypermode.com/t/manifest-removes-non-existing-table/19882 We are experiencing an error where dgraph tries to remove a table that is missing. I assume that given it has already been removed, it should not trigger a crash loop in compaction. Instead i sense check that the table is indeed removed from all levels, and return without an error code.
1 parent bac276d commit 674c0a7

File tree

7 files changed

+62
-43
lines changed

7 files changed

+62
-43
lines changed

badger/cmd/info.go

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ import (
1616
"strings"
1717
"time"
1818

19-
humanize "github.com/dustin/go-humanize"
19+
"github.com/dustin/go-humanize"
2020
"github.com/spf13/cobra"
2121

2222
"github.com/dgraph-io/badger/v4"
@@ -109,7 +109,7 @@ func handleInfo(cmd *cobra.Command, args []string) error {
109109
return nil
110110
}
111111

112-
if err := printInfo(sstDir, vlogDir); err != nil {
112+
if err := printInfo(sstDir, vlogDir, bopt); err != nil {
113113
return y.Wrap(err, "failed to print information in MANIFEST file")
114114
}
115115

@@ -320,7 +320,7 @@ func readDir(dir string) ([]fs.FileInfo, error) {
320320
return infos, err
321321
}
322322

323-
func printInfo(dir, valueDir string) error {
323+
func printInfo(dir, valueDir string, bopt badger.Options) error {
324324
if dir == "" {
325325
return fmt.Errorf("--dir not supplied")
326326
}
@@ -336,7 +336,7 @@ func printInfo(dir, valueDir string) error {
336336
fp.Close()
337337
}
338338
}()
339-
manifest, truncOffset, err := badger.ReplayManifestFile(fp, opt.externalMagicVersion)
339+
manifest, truncOffset, err := badger.ReplayManifestFile(fp, opt.externalMagicVersion, bopt)
340340
if err != nil {
341341
return err
342342
}

db2_test.go

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -427,24 +427,24 @@ func TestCompactionFilePicking(t *testing.T) {
427427
for i := 1; i <= 10; i++ {
428428
// Each table has difference of 1 between smallest and largest key.
429429
tab := createTableWithRange(t, db, 2*i-1, 2*i)
430-
addToManifest(t, db, tab, 3)
430+
addToManifest(t, db, tab, 3, db.opt)
431431
require.NoError(t, l3.replaceTables([]*table.Table{}, []*table.Table{tab}))
432432
}
433433

434434
l2 := db.lc.levels[2]
435435
// First table has keys 1 and 4.
436436
tab := createTableWithRange(t, db, 1, 4)
437-
addToManifest(t, db, tab, 2)
437+
addToManifest(t, db, tab, 2, db.opt)
438438
require.NoError(t, l2.replaceTables([]*table.Table{}, []*table.Table{tab}))
439439

440440
// Second table has keys 5 and 12.
441441
tab = createTableWithRange(t, db, 5, 12)
442-
addToManifest(t, db, tab, 2)
442+
addToManifest(t, db, tab, 2, db.opt)
443443
require.NoError(t, l2.replaceTables([]*table.Table{}, []*table.Table{tab}))
444444

445445
// Third table has keys 13 and 18.
446446
tab = createTableWithRange(t, db, 13, 18)
447-
addToManifest(t, db, tab, 2)
447+
addToManifest(t, db, tab, 2, db.opt)
448448
require.NoError(t, l2.replaceTables([]*table.Table{}, []*table.Table{tab}))
449449

450450
cdef := &compactDef{
@@ -479,14 +479,14 @@ func TestCompactionFilePicking(t *testing.T) {
479479
}
480480

481481
// addToManifest function is used in TestCompactionFilePicking. It adds table to db manifest.
482-
func addToManifest(t *testing.T, db *DB, tab *table.Table, level uint32) {
482+
func addToManifest(t *testing.T, db *DB, tab *table.Table, level uint32, opt Options) {
483483
change := &pb.ManifestChange{
484484
Id: tab.ID(),
485485
Op: pb.ManifestChange_CREATE,
486486
Level: level,
487487
Compression: uint32(tab.CompressionType()),
488488
}
489-
require.NoError(t, db.manifest.addChanges([]*pb.ManifestChange{change}),
489+
require.NoError(t, db.manifest.addChanges([]*pb.ManifestChange{change}, opt),
490490
"unable to add to manifest")
491491
}
492492

@@ -679,7 +679,7 @@ func TestWindowsDataLoss(t *testing.T) {
679679
v := []byte("barValuebarValuebarValuebarValuebarValue")
680680
require.Greater(t, len(v), db.valueThreshold())
681681

682-
//32 bytes length and now it's not working
682+
// 32 bytes length and now it's not working
683683
err := txn.Set(key, v)
684684
require.NoError(t, err)
685685
keyList = append(keyList, key)

levels.go

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -226,7 +226,7 @@ func (s *levelsController) dropTree() (int, error) {
226226
}
227227
}
228228
changeSet := pb.ManifestChangeSet{Changes: changes}
229-
if err := s.kv.manifest.addChanges(changeSet.Changes); err != nil {
229+
if err := s.kv.manifest.addChanges(changeSet.Changes, s.kv.opt); err != nil {
230230
return 0, err
231231
}
232232

@@ -1437,7 +1437,7 @@ func (s *levelsController) runCompactDef(id, l int, cd compactDef) (err error) {
14371437
changeSet := buildChangeSet(&cd, newTables)
14381438

14391439
// We write to the manifest _before_ we delete files (and after we created files)
1440-
if err := s.kv.manifest.addChanges(changeSet.Changes); err != nil {
1440+
if err := s.kv.manifest.addChanges(changeSet.Changes, s.kv.opt); err != nil {
14411441
return err
14421442
}
14431443

@@ -1566,7 +1566,7 @@ func (s *levelsController) addLevel0Table(t *table.Table) error {
15661566
// deletes the table.)
15671567
err := s.kv.manifest.addChanges([]*pb.ManifestChange{
15681568
newCreateChange(t.ID(), 0, t.KeyID(), t.CompressionType()),
1569-
})
1569+
}, s.kv.opt)
15701570
if err != nil {
15711571
return err
15721572
}

levels_test.go

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@ func createAndOpen(db *DB, td []keyValVersion, level int) {
4545
}
4646
if err := db.manifest.addChanges([]*pb.ManifestChange{
4747
newCreateChange(tab.ID(), level, 0, tab.CompressionType()),
48-
}); err != nil {
48+
}, db.opt); err != nil {
4949
panic(err)
5050
}
5151
db.lc.levels[level].Lock()
@@ -1193,7 +1193,7 @@ func TestFillTableCleanup(t *testing.T) {
11931193
tab := buildTable(i)
11941194
require.NoError(t, db.manifest.addChanges([]*pb.ManifestChange{
11951195
newCreateChange(tab.ID(), level, 0, tab.CompressionType()),
1196-
}))
1196+
}, db.opt))
11971197
tab.CreatedAt = time.Now().Add(-10 * time.Hour)
11981198
// Add table to the given level.
11991199
lh.addTable(tab)
@@ -1270,7 +1270,7 @@ func TestStaleDataCleanup(t *testing.T) {
12701270
tab := buildStaleTable(i)
12711271
require.NoError(t, db.manifest.addChanges([]*pb.ManifestChange{
12721272
newCreateChange(tab.ID(), level, 0, tab.CompressionType()),
1273-
}))
1273+
}, db.opt))
12741274
tab.CreatedAt = time.Now().Add(-10 * time.Hour)
12751275
// Add table to the given level.
12761276
lh.addTable(tab)

manifest.go

Lines changed: 21 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -105,10 +105,10 @@ func (m *Manifest) asChanges() []*pb.ManifestChange {
105105
return changes
106106
}
107107

108-
func (m *Manifest) clone() Manifest {
108+
func (m *Manifest) clone(opt Options) Manifest {
109109
changeSet := pb.ManifestChangeSet{Changes: m.asChanges()}
110110
ret := createManifest()
111-
y.Check(applyChangeSet(&ret, &changeSet))
111+
y.Check(applyChangeSet(&ret, &changeSet, opt))
112112
return ret
113113
}
114114

@@ -120,11 +120,11 @@ func openOrCreateManifestFile(opt Options) (
120120
return &manifestFile{inMemory: true}, Manifest{}, nil
121121
}
122122
return helpOpenOrCreateManifestFile(opt.Dir, opt.ReadOnly, opt.ExternalMagicVersion,
123-
manifestDeletionsRewriteThreshold)
123+
manifestDeletionsRewriteThreshold, opt)
124124
}
125125

126126
func helpOpenOrCreateManifestFile(dir string, readOnly bool, extMagic uint16,
127-
deletionsThreshold int) (*manifestFile, Manifest, error) {
127+
deletionsThreshold int, opt Options) (*manifestFile, Manifest, error) {
128128

129129
path := filepath.Join(dir, ManifestFilename)
130130
var flags y.Flags
@@ -149,13 +149,13 @@ func helpOpenOrCreateManifestFile(dir string, readOnly bool, extMagic uint16,
149149
fp: fp,
150150
directory: dir,
151151
externalMagic: extMagic,
152-
manifest: m.clone(),
152+
manifest: m.clone(opt),
153153
deletionsRewriteThreshold: deletionsThreshold,
154154
}
155155
return mf, m, nil
156156
}
157157

158-
manifest, truncOffset, err := ReplayManifestFile(fp, extMagic)
158+
manifest, truncOffset, err := ReplayManifestFile(fp, extMagic, opt)
159159
if err != nil {
160160
_ = fp.Close()
161161
return nil, Manifest{}, err
@@ -177,7 +177,7 @@ func helpOpenOrCreateManifestFile(dir string, readOnly bool, extMagic uint16,
177177
fp: fp,
178178
directory: dir,
179179
externalMagic: extMagic,
180-
manifest: manifest.clone(),
180+
manifest: manifest.clone(opt),
181181
deletionsRewriteThreshold: deletionsThreshold,
182182
}
183183
return mf, manifest, nil
@@ -194,7 +194,7 @@ func (mf *manifestFile) close() error {
194194
// we replay the MANIFEST file, we'll either replay all the changes or none of them. (The truth of
195195
// this depends on the filesystem -- some might append garbage data if a system crash happens at
196196
// the wrong time.)
197-
func (mf *manifestFile) addChanges(changesParam []*pb.ManifestChange) error {
197+
func (mf *manifestFile) addChanges(changesParam []*pb.ManifestChange, opt Options) error {
198198
if mf.inMemory {
199199
return nil
200200
}
@@ -207,7 +207,7 @@ func (mf *manifestFile) addChanges(changesParam []*pb.ManifestChange) error {
207207
// Maybe we could use O_APPEND instead (on certain file systems)
208208
mf.appendLock.Lock()
209209
defer mf.appendLock.Unlock()
210-
if err := applyChangeSet(&mf.manifest, &changes); err != nil {
210+
if err := applyChangeSet(&mf.manifest, &changes, opt); err != nil {
211211
return err
212212
}
213213
// Rewrite manifest if it'd shrink by 1/10 and it's big enough to care
@@ -350,7 +350,7 @@ var (
350350
// Also, returns the last offset after a completely read manifest entry -- the file must be
351351
// truncated at that point before further appends are made (if there is a partial entry after
352352
// that). In normal conditions, truncOffset is the file size.
353-
func ReplayManifestFile(fp *os.File, extMagic uint16) (Manifest, int64, error) {
353+
func ReplayManifestFile(fp *os.File, extMagic uint16, opt Options) (Manifest, int64, error) {
354354
r := countingReader{wrapped: bufio.NewReader(fp)}
355355

356356
var magicBuf [8]byte
@@ -418,15 +418,15 @@ func ReplayManifestFile(fp *os.File, extMagic uint16) (Manifest, int64, error) {
418418
return Manifest{}, 0, err
419419
}
420420

421-
if err := applyChangeSet(&build, &changeSet); err != nil {
421+
if err := applyChangeSet(&build, &changeSet, opt); err != nil {
422422
return Manifest{}, 0, err
423423
}
424424
}
425425

426426
return build, offset, nil
427427
}
428428

429-
func applyManifestChange(build *Manifest, tc *pb.ManifestChange) error {
429+
func applyManifestChange(build *Manifest, tc *pb.ManifestChange, opt Options) error {
430430
switch tc.Op {
431431
case pb.ManifestChange_CREATE:
432432
if _, ok := build.Tables[tc.Id]; ok {
@@ -445,10 +445,14 @@ func applyManifestChange(build *Manifest, tc *pb.ManifestChange) error {
445445
case pb.ManifestChange_DELETE:
446446
tm, ok := build.Tables[tc.Id]
447447
if !ok {
448-
return fmt.Errorf("MANIFEST removes non-existing table %d", tc.Id)
448+
opt.Warningf("MANIFEST delete: table %d has already been removed", tc.Id)
449+
for _, level := range build.Levels {
450+
delete(level.Tables, tc.Id)
451+
}
452+
} else {
453+
delete(build.Levels[tm.Level].Tables, tc.Id)
454+
delete(build.Tables, tc.Id)
449455
}
450-
delete(build.Levels[tm.Level].Tables, tc.Id)
451-
delete(build.Tables, tc.Id)
452456
build.Deletions++
453457
default:
454458
return fmt.Errorf("MANIFEST file has invalid manifestChange op")
@@ -458,9 +462,9 @@ func applyManifestChange(build *Manifest, tc *pb.ManifestChange) error {
458462

459463
// This is not a "recoverable" error -- opening the KV store fails because the MANIFEST file is
460464
// just plain broken.
461-
func applyChangeSet(build *Manifest, changeSet *pb.ManifestChangeSet) error {
465+
func applyChangeSet(build *Manifest, changeSet *pb.ManifestChangeSet, opt Options) error {
462466
for _, change := range changeSet.Changes {
463-
if err := applyManifestChange(build, change); err != nil {
467+
if err := applyManifestChange(build, change, opt); err != nil {
464468
return err
465469
}
466470
}

manifest_test.go

Lines changed: 22 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -196,9 +196,18 @@ func TestOverlappingKeyRangeError(t *testing.T) {
196196
func TestManifestRewrite(t *testing.T) {
197197
dir, err := os.MkdirTemp("", "badger-test")
198198
require.NoError(t, err)
199-
defer removeDir(dir)
199+
200+
db, err := Open(DefaultOptions(dir))
201+
require.NoError(t, err, "error while opening db")
202+
203+
defer func() {
204+
require.NoError(t, db.Close())
205+
removeDir(dir)
206+
}()
207+
200208
deletionsThreshold := 10
201-
mf, m, err := helpOpenOrCreateManifestFile(dir, false, 0, deletionsThreshold)
209+
210+
mf, m, err := helpOpenOrCreateManifestFile(dir, false, 0, deletionsThreshold, db.opt)
202211
defer func() {
203212
if mf != nil {
204213
mf.close()
@@ -210,21 +219,21 @@ func TestManifestRewrite(t *testing.T) {
210219

211220
err = mf.addChanges([]*pb.ManifestChange{
212221
newCreateChange(0, 0, 0, 0),
213-
})
222+
}, db.opt)
214223
require.NoError(t, err)
215224

216225
for i := uint64(0); i < uint64(deletionsThreshold*3); i++ {
217226
ch := []*pb.ManifestChange{
218227
newCreateChange(i+1, 0, 0, 0),
219228
newDeleteChange(i),
220229
}
221-
err := mf.addChanges(ch)
230+
err := mf.addChanges(ch, db.opt)
222231
require.NoError(t, err)
223232
}
224233
err = mf.close()
225234
require.NoError(t, err)
226235
mf = nil
227-
mf, m, err = helpOpenOrCreateManifestFile(dir, false, 0, deletionsThreshold)
236+
mf, m, err = helpOpenOrCreateManifestFile(dir, false, 0, deletionsThreshold, db.opt)
228237
require.NoError(t, err)
229238
require.Equal(t, map[uint64]TableManifest{
230239
uint64(deletionsThreshold * 3): {Level: 0},
@@ -236,14 +245,20 @@ func TestConcurrentManifestCompaction(t *testing.T) {
236245
require.NoError(t, err)
237246
defer removeDir(dir)
238247

248+
db, err := Open(DefaultOptions(dir))
249+
require.NoError(t, err, "error while opening db")
250+
defer func() {
251+
require.NoError(t, db.Close())
252+
}()
253+
239254
// overwrite the sync function to make this race condition easily reproducible
240255
syncFunc = func(f *os.File) error {
241256
// effectively making the Sync() take around 1s makes this reproduce every time
242257
time.Sleep(1 * time.Second)
243258
return f.Sync()
244259
}
245260

246-
mf, _, err := helpOpenOrCreateManifestFile(dir, false, 0, 0)
261+
mf, _, err := helpOpenOrCreateManifestFile(dir, false, 0, 0, db.opt)
247262
require.NoError(t, err)
248263

249264
cs := &pb.ManifestChangeSet{}
@@ -261,7 +276,7 @@ func TestConcurrentManifestCompaction(t *testing.T) {
261276
for i := 0; i < n; i++ {
262277
go func() {
263278
defer wg.Done()
264-
require.NoError(t, mf.addChanges(cs.Changes))
279+
require.NoError(t, mf.addChanges(cs.Changes, db.opt))
265280
}()
266281
}
267282
wg.Wait()

stream_writer.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ import (
1010
"fmt"
1111
"sync"
1212

13-
humanize "github.com/dustin/go-humanize"
13+
"github.com/dustin/go-humanize"
1414
"google.golang.org/protobuf/proto"
1515

1616
"github.com/dgraph-io/badger/v4/pb"
@@ -504,7 +504,7 @@ func (w *sortedWriter) createTable(builder *table.Builder) error {
504504
Level: uint32(lhandler.level),
505505
Compression: uint32(tbl.CompressionType()),
506506
}
507-
if err := w.db.manifest.addChanges([]*pb.ManifestChange{change}); err != nil {
507+
if err := w.db.manifest.addChanges([]*pb.ManifestChange{change}, w.db.opt); err != nil {
508508
return err
509509
}
510510

0 commit comments

Comments
 (0)