Skip to content

Commit 0430086

Browse files
committed
PBM-1114: save metadata file before backup done
backup status can be set to done in db meta before the meta is written to storage. if the write fails, no meta will be available on storage. storage resync will delete backup meta from db and won't see it on storage.
1 parent 827d30d commit 0430086

File tree

2 files changed

+62
-25
lines changed

2 files changed

+62
-25
lines changed

pbm/backup/backup.go

Lines changed: 39 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -337,18 +337,39 @@ func (b *Backup) Run(ctx context.Context, bcp *ctrl.BackupCmd, opid ctrl.OPID, l
337337
}
338338

339339
if inf.IsLeader() {
340-
err = b.reconcileStatus(ctx, bcp.Name, opid.String(), defs.StatusDone, nil)
340+
shards, err := topo.ClusterMembers(ctx, b.leadConn.MongoClient())
341341
if err != nil {
342-
return errors.Wrap(err, "check cluster for backup done")
342+
return errors.Wrap(err, "check cluster for backup done: get cluster members")
343+
}
344+
345+
err = b.convergeCluster(ctx, bcp.Name, opid.String(), shards, defs.StatusDone)
346+
err = errors.Wrap(err, "check cluster for backup done: convergeCluster")
347+
if err != nil {
348+
return err
343349
}
344350

345351
bcpm, err = NewDBManager(b.leadConn).GetBackupByName(ctx, bcp.Name)
346352
if err != nil {
347353
return errors.Wrap(err, "get backup metadata")
348354
}
349355

356+
// PBM-1114: update file metadata with the same values as in database
357+
unix := time.Now().Unix()
358+
bcpm.Status = defs.StatusDone
359+
bcpm.LastTransitionTS = unix
360+
bcpm.Conditions = append(bcpm.Conditions, Condition{
361+
Timestamp: unix,
362+
Status: defs.StatusDone,
363+
})
364+
350365
err = writeMeta(stg, bcpm)
351-
return errors.Wrap(err, "dump metadata")
366+
if err != nil {
367+
return errors.Wrap(err, "dump metadata")
368+
}
369+
370+
err = ChangeBackupStateWithUnix(b.leadConn, bcp.Name, defs.StatusDone, unix, "")
371+
return errors.Wrapf(err, "check cluster for backup done: update backup meta with %s",
372+
defs.StatusDone)
352373
} else {
353374
// to be sure the locks released only after the "done" status had written
354375
err = b.waitForStatus(ctx, bcp.Name, defs.StatusDone, nil)
@@ -432,14 +453,18 @@ func (b *Backup) reconcileStatus(
432453
}
433454

434455
if timeout != nil {
435-
return errors.Wrap(
436-
b.convergeClusterWithTimeout(ctx, bcpName, opid, shards, status, *timeout),
437-
"convergeClusterWithTimeout")
456+
err = b.convergeClusterWithTimeout(ctx, bcpName, opid, shards, status, *timeout)
457+
err = errors.Wrap(err, "convergeClusterWithTimeout")
458+
} else {
459+
err = b.convergeCluster(ctx, bcpName, opid, shards, status)
460+
err = errors.Wrap(err, "convergeCluster")
461+
}
462+
if err != nil {
463+
return err
438464
}
439465

440-
return errors.Wrap(
441-
b.convergeCluster(ctx, bcpName, opid, shards, status),
442-
"convergeCluster")
466+
err = ChangeBackupState(b.leadConn, bcpName, status, "")
467+
return errors.Wrapf(err, "update backup meta with %s", status)
443468
}
444469

445470
// convergeCluster waits until all given shards reached `status` and updates a cluster status
@@ -480,10 +505,11 @@ func (b *Backup) convergeClusterWithTimeout(
480505
status defs.Status,
481506
t time.Duration,
482507
) error {
483-
tk := time.NewTicker(time.Second * 1)
508+
tk := time.NewTicker(time.Second)
484509
defer tk.Stop()
485510

486-
tout := time.After(t)
511+
tout := time.NewTimer(t)
512+
defer tout.Stop()
487513

488514
for {
489515
select {
@@ -495,7 +521,7 @@ func (b *Backup) convergeClusterWithTimeout(
495521
if ok {
496522
return nil
497523
}
498-
case <-tout:
524+
case <-tout.C:
499525
return errors.Wrap(errConvergeTimeOut, t.String())
500526
case <-ctx.Done():
501527
return ctx.Err()
@@ -554,15 +580,7 @@ func (b *Backup) converged(
554580
}
555581
}
556582

557-
if shardsToFinish == 0 {
558-
err := ChangeBackupState(b.leadConn, bcpName, status, "")
559-
if err != nil {
560-
return false, errors.Wrapf(err, "update backup meta with %s", status)
561-
}
562-
return true, nil
563-
}
564-
565-
return false, nil
583+
return shardsToFinish == 0, nil
566584
}
567585

568586
func (b *Backup) waitForStatus(

pbm/backup/query.go

Lines changed: 23 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -81,15 +81,34 @@ func getBackupMeta(ctx context.Context, conn connect.Client, clause bson.D) (*Ba
8181
}
8282

8383
func ChangeBackupStateOPID(conn connect.Client, opid string, s defs.Status, msg string) error {
84-
return changeBackupState(context.Background(), conn, bson.D{{"opid", opid}}, s, msg)
84+
return changeBackupState(context.TODO(),
85+
conn, bson.D{{"opid", opid}}, time.Now().UTC().Unix(), s, msg)
8586
}
8687

8788
func ChangeBackupState(conn connect.Client, bcpName string, s defs.Status, msg string) error {
88-
return changeBackupState(context.Background(), conn, bson.D{{"name", bcpName}}, s, msg)
89+
return changeBackupState(context.TODO(),
90+
conn, bson.D{{"name", bcpName}}, time.Now().UTC().Unix(), s, msg)
8991
}
9092

91-
func changeBackupState(ctx context.Context, conn connect.Client, clause bson.D, s defs.Status, msg string) error {
92-
ts := time.Now().UTC().Unix()
93+
func ChangeBackupStateWithUnix(
94+
conn connect.Client,
95+
bcpName string,
96+
s defs.Status,
97+
unix int64,
98+
msg string,
99+
) error {
100+
return changeBackupState(context.TODO(),
101+
conn, bson.D{{"name", bcpName}}, time.Now().UTC().Unix(), s, msg)
102+
}
103+
104+
func changeBackupState(
105+
ctx context.Context,
106+
conn connect.Client,
107+
clause bson.D,
108+
ts int64,
109+
s defs.Status,
110+
msg string,
111+
) error {
93112
_, err := conn.BcpCollection().UpdateOne(
94113
ctx,
95114
clause,

0 commit comments

Comments
 (0)