Skip to content

Commit 71752a3

Browse files
committed
Expand cleanup strategy for all use cases
It takes into accoount: - cluster status, - node status, - fallback-enabled opt - allow-partly-done opt
1 parent 92df5b1 commit 71752a3

File tree

2 files changed

+94
-32
lines changed

2 files changed

+94
-32
lines changed

pbm/restore/physical.go

Lines changed: 50 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -218,9 +218,9 @@ func peekTmpPort(current int) (int, error) {
218218
}
219219

220220
// close cleans up all temp restore files.
221-
// Based on error status it does cleanup of the node's db path or it
221+
// Based on cluster status it does cleanup of the node's db path or it
222222
// applies fallback sync recovery strategy.
223-
func (r *PhysRestore) close(noerr, cleanup bool) {
223+
func (r *PhysRestore) close(noerr bool, progress nodeStatus) {
224224
if r.tmpConf != nil {
225225
r.log.Debug("rm tmp conf")
226226
err := os.Remove(r.tmpConf.Name())
@@ -249,9 +249,10 @@ func (r *PhysRestore) close(noerr, cleanup bool) {
249249
if err != nil {
250250
r.log.Warning("waiting for cluster status during cleanup: %v", err)
251251
}
252-
if cleanup {
253-
r.resolveCleanupStrategy(cStatus)()
254-
}
252+
253+
// resolve and exec cleanup
254+
cleanup := r.resolveCleanupStrategy(cStatus, progress)
255+
cleanup()
255256

256257
if r.fallback {
257258
// free space by just deleting fallback dir in any other case
@@ -268,7 +269,7 @@ func (r *PhysRestore) close(noerr, cleanup bool) {
268269

269270
// doFullCleanup is node's cleanup strategy for deleting the whole dbpath.
270271
func (r *PhysRestore) doFullCleanup() {
271-
r.log.Debug("clean-up dbpath")
272+
r.log.Warning("apply full cleanup strategy")
272273
err := removeAll(r.dbpath, r.log, getInternalLogFileSkipRule())
273274
if err != nil {
274275
r.log.Error("flush dbpath %s: %v", r.dbpath, err)
@@ -278,41 +279,58 @@ func (r *PhysRestore) doFullCleanup() {
278279
// doFallbackCleanup is node's cleanup strategy for recovering dbpath
279280
// from fallbacksync dir.
280281
func (r *PhysRestore) doFallbackCleanup() {
281-
r.log.Warning("apply db data from %s", fallbackDir)
282+
r.log.Warning("apply fallback cleanup strategy: using db data from %s", fallbackDir)
282283
err := r.migrateFromFallbackDirToDBDir()
283284
if err != nil {
284285
r.log.Error("migrate from fallback dir: %v", err)
285286
}
286287
}
287288

288-
// resolveCleanupStrategy is helper which returns cleanup strategy based on
289-
// config parameters and cluster status.
290-
func (r *PhysRestore) resolveCleanupStrategy(clusterStatus defs.Status) func() {
291-
if !r.fallback {
292-
// fallback strategy is disabled
293-
return r.doFullCleanup
289+
func (r *PhysRestore) skipCleanup() {
290+
r.log.Debug("no cleanup strategy to apply")
291+
}
292+
293+
// resolveCleanupStrategy returns cleanup strategy based on config parameters, cluster status
294+
// and node progress.
295+
func (r *PhysRestore) resolveCleanupStrategy(
296+
clusterStatus defs.Status,
297+
progress nodeStatus,
298+
) func() {
299+
if progress.isDbPathUntouched() {
300+
return r.skipCleanup
294301
}
295302

296-
// fallback strategy is enabled
303+
// dbpath has been changed
297304
switch clusterStatus {
305+
298306
case defs.StatusError:
299-
// restore failed, fallback recovery will be applied
300-
return r.doFallbackCleanup
307+
if r.fallback {
308+
return r.doFallbackCleanup
309+
} else { // fallback is disabled
310+
if progress.isForCleanup() {
311+
return r.doFullCleanup
312+
} else {
313+
// node is in status done in this case
314+
return r.skipCleanup
315+
}
316+
}
317+
301318
case defs.StatusPartlyDone:
302-
if r.allowPartlyDone {
303-
// cluster is partly-done with new restore, and this node will be cleaned
304-
return r.doFullCleanup
305-
} else {
306-
// cluster is partly-done, but anyway fallback recovery will be applied
319+
if r.allowPartlyDone || !r.fallback {
320+
if progress.isForCleanup() {
321+
return r.doFullCleanup
322+
} else {
323+
return r.skipCleanup
324+
}
325+
} else { // partly-done state is not allowed and fallback is enabled
307326
return r.doFallbackCleanup
308327
}
328+
309329
case defs.StatusDone:
310-
// this shouldn't happen, because at least this node failed
311-
// anyway, we'll cleanup this node
312-
return r.doFullCleanup
330+
return r.skipCleanup
313331
}
314332

315-
return r.doFullCleanup
333+
return r.skipCleanup
316334
}
317335

318336
// waitClusterStatus blocks until cluster status file is set on one of final statuses.
@@ -918,6 +936,12 @@ func (n nodeStatus) isForCleanup() bool {
918936
return n&restoreStared != 0 && n&restoreDone == 0
919937
}
920938

939+
// isDbPathUntouched returns true when restore progress
940+
// didn't do any data changes within db path.
941+
func (n nodeStatus) isDbPathUntouched() bool {
942+
return n&restoreStared == 0
943+
}
944+
921945
// isFailed returns true when internal node state didn't reach
922946
// done state, no matter whether it was started or not.
923947
func (n nodeStatus) isFailed() bool {
@@ -1032,7 +1056,7 @@ func (r *PhysRestore) Snapshot(
10321056
r.MarkFailed(err)
10331057
}
10341058

1035-
r.close(err == nil, progress.isForCleanup())
1059+
r.close(err == nil, progress)
10361060
}()
10371061

10381062
err = r.init(ctx, cmd.Name, opid, l)

pbm/restore/physical_test.go

Lines changed: 44 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -231,13 +231,18 @@ func TestResolveCleanupStrategy(t *testing.T) {
231231
const (
232232
fallbackStrategy strategyDesc = "fallback"
233233
fullCleanupStrategy strategyDesc = "fullCleanup"
234+
skipCleanup strategyDesc = "skipCleanup"
235+
236+
nodeUntouched nodeStatus = 0
237+
nodeTouched nodeStatus = 1
234238
)
235239

236240
testCases := []struct {
237241
desc string
238242
fallback bool
239243
allowPartlyDone bool
240244
clusterStatus defs.Status
245+
nodeStatus nodeStatus
241246
wantStrategy strategyDesc
242247
}{
243248
// fallback enabled
@@ -246,43 +251,49 @@ func TestResolveCleanupStrategy(t *testing.T) {
246251
fallback: true,
247252
allowPartlyDone: true,
248253
clusterStatus: defs.StatusError,
254+
nodeStatus: nodeTouched,
249255
wantStrategy: fallbackStrategy,
250256
},
251257
{
252258
desc: "fallback: enabled, allow-partly-done: enabled, status: partly-done",
253259
fallback: true,
254260
allowPartlyDone: true,
255261
clusterStatus: defs.StatusPartlyDone,
262+
nodeStatus: nodeTouched,
256263
wantStrategy: fullCleanupStrategy,
257264
},
258265
{
259266
desc: "fallback: enabled, allow-partly-done: enabled, status: done",
260267
fallback: true,
261268
allowPartlyDone: true,
262269
clusterStatus: defs.StatusDone,
263-
wantStrategy: fullCleanupStrategy,
270+
nodeStatus: nodeTouched,
271+
wantStrategy: skipCleanup,
264272
},
265273

266274
{
267275
desc: "fallback: enabled, allow-partly-done: disabled, status: error",
268276
fallback: true,
269277
allowPartlyDone: false,
270278
clusterStatus: defs.StatusError,
279+
nodeStatus: nodeTouched,
271280
wantStrategy: fallbackStrategy,
272281
},
273282
{
274283
desc: "fallback: enabled, allow-partly-done: disabled, status: partly-done",
275284
fallback: true,
276285
allowPartlyDone: false,
277286
clusterStatus: defs.StatusPartlyDone,
287+
nodeStatus: nodeTouched,
278288
wantStrategy: fallbackStrategy,
279289
},
280290
{
281291
desc: "fallback: enabled, allow-partly-done: disabled, status: done",
282292
fallback: true,
283293
allowPartlyDone: false,
284294
clusterStatus: defs.StatusDone,
285-
wantStrategy: fullCleanupStrategy,
295+
nodeStatus: nodeTouched,
296+
wantStrategy: skipCleanup,
286297
},
287298

288299
// fallback disabled
@@ -291,43 +302,66 @@ func TestResolveCleanupStrategy(t *testing.T) {
291302
fallback: false,
292303
allowPartlyDone: true,
293304
clusterStatus: defs.StatusError,
305+
nodeStatus: nodeTouched,
294306
wantStrategy: fullCleanupStrategy,
295307
},
296308
{
297309
desc: "fallback: disabled, allow-partly-done: enabled, status: partly-done",
298310
fallback: false,
299311
allowPartlyDone: true,
300312
clusterStatus: defs.StatusPartlyDone,
313+
nodeStatus: nodeTouched,
301314
wantStrategy: fullCleanupStrategy,
302315
},
303316
{
304317
desc: "fallback: disabled, allow-partly-done: enabled, status: done",
305318
fallback: false,
306319
allowPartlyDone: true,
307320
clusterStatus: defs.StatusDone,
308-
wantStrategy: fullCleanupStrategy,
321+
nodeStatus: nodeTouched,
322+
wantStrategy: skipCleanup,
309323
},
310-
311324
{
312325
desc: "fallback: disabled, allow-partly-done: disabled, status: error",
313326
fallback: false,
314327
allowPartlyDone: false,
315328
clusterStatus: defs.StatusError,
329+
nodeStatus: nodeTouched,
316330
wantStrategy: fullCleanupStrategy,
317331
},
318332
{
319333
desc: "fallback: disabled, allow-partly-done: disabled, status: partly-done",
320334
fallback: false,
321335
allowPartlyDone: false,
322336
clusterStatus: defs.StatusPartlyDone,
337+
nodeStatus: nodeTouched,
323338
wantStrategy: fullCleanupStrategy,
324339
},
325340
{
326341
desc: "fallback: disabled, allow-partly-done: disabled, status: done",
327342
fallback: false,
328343
allowPartlyDone: false,
329344
clusterStatus: defs.StatusDone,
330-
wantStrategy: fullCleanupStrategy,
345+
nodeStatus: nodeTouched,
346+
wantStrategy: skipCleanup,
347+
},
348+
349+
// db path is untouched
350+
{
351+
desc: "fallback: enabled, allow-partly-done: enabled, status: error",
352+
fallback: true,
353+
allowPartlyDone: true,
354+
clusterStatus: defs.StatusError,
355+
nodeStatus: nodeUntouched,
356+
wantStrategy: skipCleanup,
357+
},
358+
{
359+
desc: "fallback: disabled, allow-partly-done: enabled, status: error",
360+
fallback: false,
361+
allowPartlyDone: true,
362+
clusterStatus: defs.StatusError,
363+
nodeStatus: nodeUntouched,
364+
wantStrategy: skipCleanup,
331365
},
332366
}
333367

@@ -338,7 +372,7 @@ func TestResolveCleanupStrategy(t *testing.T) {
338372
allowPartlyDone: tC.allowPartlyDone,
339373
log: log.DiscardLogger.NewDefaultEvent(),
340374
}
341-
strategy := r.resolveCleanupStrategy(tC.clusterStatus)
375+
strategy := r.resolveCleanupStrategy(tC.clusterStatus, tC.nodeStatus)
342376
if tC.wantStrategy == fullCleanupStrategy {
343377
if cmpStrategy(strategy) != cmpStrategy(r.doFullCleanup) {
344378
t.Fatalf("want=%s", fullCleanupStrategy)
@@ -348,6 +382,10 @@ func TestResolveCleanupStrategy(t *testing.T) {
348382
if cmpStrategy(strategy) != cmpStrategy(r.doFallbackCleanup) {
349383
t.Fatalf("want=%s", fallbackStrategy)
350384
}
385+
} else if tC.wantStrategy == skipCleanup {
386+
if cmpStrategy(strategy) != cmpStrategy(r.skipCleanup) {
387+
t.Fatalf("want=%s", skipCleanup)
388+
}
351389
}
352390
})
353391
}

0 commit comments

Comments
 (0)