@@ -90,7 +90,7 @@ func GetAutoBackupPrivValidatorStateCmd() *cobra.Command {
9090 fmt .Println (latestBackupPvs .Json ())
9191
9292 const interval = 800 * time .Millisecond
93- lastExecution := time .Now (). UTC (). Add ( - interval )
93+ var lastExecution time.Time
9494
9595 var createdBackup []string
9696
@@ -115,13 +115,20 @@ func GetAutoBackupPrivValidatorStateCmd() *cobra.Command {
115115
116116 // Load the recent state
117117
118- pvs := & types.PrivateValidatorState {}
119- err := pvs .LoadFromJSONFile (privValStateJsonFilePath )
118+ loadRecentPrivateValidatorState := func () (types.PrivateValidatorState , error ) {
119+ pvs := & types.PrivateValidatorState {}
120+ err := pvs .LoadFromJSONFile (privValStateJsonFilePath )
121+ if err != nil {
122+ return types.PrivateValidatorState {}, err
123+ }
124+ return * pvs , nil
125+ }
126+
127+ recentPvs , err := loadRecentPrivateValidatorState ()
120128 if err != nil {
121129 utils .PrintlnStdErr ("ERR: failed to load priv_validator_state.json file:" , err )
122130 continue
123131 }
124- recentPvs := * pvs
125132
126133 cmp , _ := latestBackupPvs .CompareState (recentPvs )
127134 // TODO handle different signs flag, returned by CompareState
@@ -162,6 +169,40 @@ func GetAutoBackupPrivValidatorStateCmd() *cobra.Command {
162169 continue
163170 }
164171
172+ const slightlySleepDuration = 5 * time .Millisecond // prevent consuming all CPU
173+
174+ if recentPvs .IsEmpty () {
175+ fmt .Println ("WARN: detected state file is empty, possibly restoring snapshot" )
176+ fmt .Println ("WARN: attempts to kill the node binary" , binaryNameToKill , "while waiting content to be restored" )
177+
178+ // possibly restoring snapshot progress
179+ killedStatusOnSoftProtectRestoreSnapshot := & killedStatus {}
180+
181+ for {
182+ shouldIgnoreSleep := killNodeOnLoop (binaryNameToKill , killedStatusOnSoftProtectRestoreSnapshot )
183+ if shouldIgnoreSleep {
184+ time .Sleep (slightlySleepDuration )
185+ } else {
186+ time .Sleep (100 * time .Millisecond )
187+ }
188+
189+ recentPvs , err = loadRecentPrivateValidatorState ()
190+ if err != nil {
191+ utils .PrintlnStdErr ("ERR: failed to load priv_validator_state.json file after killing node:" , err )
192+ time .Sleep (slightlySleepDuration )
193+ continue
194+ }
195+
196+ if ! recentPvs .IsEmpty () {
197+ // recent state no longer empty, continue to check in next loop
198+ break
199+ }
200+ }
201+
202+ lastExecution = time.Time {} // reset last execution time, move to next as fast as possible
203+ continue
204+ }
205+
165206 utils .PrintlnStdErr ("FATAL: priv_validator_state.json content decreased" )
166207 utils .PrintlnStdErr ("Previous state:" )
167208 utils .PrintlnStdErr (latestBackupPvs .Json ())
@@ -223,9 +264,14 @@ How to recover:
223264
224265 // Force-stop the node
225266
226- killedStatus := & killedStatus {}
267+ killedStatusOnFatal := & killedStatus {}
227268 for {
228- killNodeLoop (binaryNameToKill , killedStatus )
269+ shouldIgnoreSleep := killNodeOnLoop (binaryNameToKill , killedStatusOnFatal )
270+ if shouldIgnoreSleep {
271+ time .Sleep (slightlySleepDuration )
272+ } else {
273+ time .Sleep (300 * time .Millisecond )
274+ }
229275 }
230276 }
231277 },
@@ -241,23 +287,14 @@ type killedStatus struct {
241287 killedCount uint
242288}
243289
244- func killNodeLoop (binaryNameToKill string , killedStatus * killedStatus ) {
245- var ignoreSleep bool
246- defer func () {
247- if ignoreSleep {
248- time .Sleep (5 * time .Millisecond ) // prevent CPU race condition
249- } else {
250- time .Sleep (300 * time .Millisecond )
251- }
252- }()
253-
290+ func killNodeOnLoop (binaryNameToKill string , killedStatus * killedStatus ) (shouldIgnoreSleep bool ) {
254291 if killedStatus .killedCount < 1 {
255292 fmt .Println ("INF: Killing the node binary:" , binaryNameToKill )
256293 }
257294 processes , err := process .Processes ()
258295 if err != nil {
259296 utils .PrintlnStdErr ("ERR: failed to get processes:" , err )
260- ignoreSleep = true
297+ shouldIgnoreSleep = true
261298 return
262299 }
263300
@@ -321,7 +358,7 @@ func killNodeLoop(binaryNameToKill string, killedStatus *killedStatus) {
321358 if killedStatus .killedCount < 1 {
322359 utils .PrintlnStdErr ("ERR: no process found to be killed" )
323360 }
324- ignoreSleep = true
361+ shouldIgnoreSleep = true
325362 return
326363 }
327364
@@ -364,6 +401,8 @@ func killNodeLoop(binaryNameToKill string, killedStatus *killedStatus) {
364401 }
365402
366403 fmt .Println ("INF: total killed" , killedStatus .killedCount , "processes" )
404+
405+ return
367406}
368407
369408func createBackupDirIfNotExists (backupDstPath string ) {
0 commit comments