Skip to content

Commit eae9f24

Browse files
committed
prevent annoying auto-backup behavior when restore node snapshot
1 parent fd54534 commit eae9f24

File tree

1 file changed

+57
-18
lines changed

1 file changed

+57
-18
lines changed

cmd/node/auto_backup_priv_validator_state.go

Lines changed: 57 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -90,7 +90,7 @@ func GetAutoBackupPrivValidatorStateCmd() *cobra.Command {
9090
fmt.Println(latestBackupPvs.Json())
9191

9292
const interval = 800 * time.Millisecond
93-
lastExecution := time.Now().UTC().Add(-interval)
93+
var lastExecution time.Time
9494

9595
var createdBackup []string
9696

@@ -115,13 +115,20 @@ func GetAutoBackupPrivValidatorStateCmd() *cobra.Command {
115115

116116
// Load the recent state
117117

118-
pvs := &types.PrivateValidatorState{}
119-
err := pvs.LoadFromJSONFile(privValStateJsonFilePath)
118+
loadRecentPrivateValidatorState := func() (types.PrivateValidatorState, error) {
119+
pvs := &types.PrivateValidatorState{}
120+
err := pvs.LoadFromJSONFile(privValStateJsonFilePath)
121+
if err != nil {
122+
return types.PrivateValidatorState{}, err
123+
}
124+
return *pvs, nil
125+
}
126+
127+
recentPvs, err := loadRecentPrivateValidatorState()
120128
if err != nil {
121129
utils.PrintlnStdErr("ERR: failed to load priv_validator_state.json file:", err)
122130
continue
123131
}
124-
recentPvs := *pvs
125132

126133
cmp, _ := latestBackupPvs.CompareState(recentPvs)
127134
// TODO handle different signs flag, returned by CompareState
@@ -162,6 +169,40 @@ func GetAutoBackupPrivValidatorStateCmd() *cobra.Command {
162169
continue
163170
}
164171

172+
const slightlySleepDuration = 5 * time.Millisecond // prevent consuming all CPU
173+
174+
if recentPvs.IsEmpty() {
175+
fmt.Println("WARN: detected state file is empty, possibly restoring snapshot")
176+
fmt.Println("WARN: attempts to kill the node binary", binaryNameToKill, "while waiting content to be restored")
177+
178+
// possibly restoring snapshot progress
179+
killedStatusOnSoftProtectRestoreSnapshot := &killedStatus{}
180+
181+
for {
182+
shouldIgnoreSleep := killNodeOnLoop(binaryNameToKill, killedStatusOnSoftProtectRestoreSnapshot)
183+
if shouldIgnoreSleep {
184+
time.Sleep(slightlySleepDuration)
185+
} else {
186+
time.Sleep(100 * time.Millisecond)
187+
}
188+
189+
recentPvs, err = loadRecentPrivateValidatorState()
190+
if err != nil {
191+
utils.PrintlnStdErr("ERR: failed to load priv_validator_state.json file after killing node:", err)
192+
time.Sleep(slightlySleepDuration)
193+
continue
194+
}
195+
196+
if !recentPvs.IsEmpty() {
197+
// recent state no longer empty, continue to check in next loop
198+
break
199+
}
200+
}
201+
202+
lastExecution = time.Time{} // reset last execution time, move to next as fast as possible
203+
continue
204+
}
205+
165206
utils.PrintlnStdErr("FATAL: priv_validator_state.json content decreased")
166207
utils.PrintlnStdErr("Previous state:")
167208
utils.PrintlnStdErr(latestBackupPvs.Json())
@@ -223,9 +264,14 @@ How to recover:
223264

224265
// Force-stop the node
225266

226-
killedStatus := &killedStatus{}
267+
killedStatusOnFatal := &killedStatus{}
227268
for {
228-
killNodeLoop(binaryNameToKill, killedStatus)
269+
shouldIgnoreSleep := killNodeOnLoop(binaryNameToKill, killedStatusOnFatal)
270+
if shouldIgnoreSleep {
271+
time.Sleep(slightlySleepDuration)
272+
} else {
273+
time.Sleep(300 * time.Millisecond)
274+
}
229275
}
230276
}
231277
},
@@ -241,23 +287,14 @@ type killedStatus struct {
241287
killedCount uint
242288
}
243289

244-
func killNodeLoop(binaryNameToKill string, killedStatus *killedStatus) {
245-
var ignoreSleep bool
246-
defer func() {
247-
if ignoreSleep {
248-
time.Sleep(5 * time.Millisecond) // prevent CPU race condition
249-
} else {
250-
time.Sleep(300 * time.Millisecond)
251-
}
252-
}()
253-
290+
func killNodeOnLoop(binaryNameToKill string, killedStatus *killedStatus) (shouldIgnoreSleep bool) {
254291
if killedStatus.killedCount < 1 {
255292
fmt.Println("INF: Killing the node binary:", binaryNameToKill)
256293
}
257294
processes, err := process.Processes()
258295
if err != nil {
259296
utils.PrintlnStdErr("ERR: failed to get processes:", err)
260-
ignoreSleep = true
297+
shouldIgnoreSleep = true
261298
return
262299
}
263300

@@ -321,7 +358,7 @@ func killNodeLoop(binaryNameToKill string, killedStatus *killedStatus) {
321358
if killedStatus.killedCount < 1 {
322359
utils.PrintlnStdErr("ERR: no process found to be killed")
323360
}
324-
ignoreSleep = true
361+
shouldIgnoreSleep = true
325362
return
326363
}
327364

@@ -364,6 +401,8 @@ func killNodeLoop(binaryNameToKill string, killedStatus *killedStatus) {
364401
}
365402

366403
fmt.Println("INF: total killed", killedStatus.killedCount, "processes")
404+
405+
return
367406
}
368407

369408
func createBackupDirIfNotExists(backupDstPath string) {

0 commit comments

Comments
 (0)