@@ -64,6 +64,11 @@ public long ReplicationOffset2
6464 public string PrimaryReplId => currentReplicationConfig . primary_replid ;
6565 public string PrimaryReplId2 => currentReplicationConfig . primary_replid2 ;
6666
67+ /// <summary>
68+ /// Recovery status
69+ /// </summary>
70+ public RecoveryStatus recoverStatus ;
71+
6772 [ MethodImpl ( MethodImplOptions . AggressiveInlining ) ]
6873 public ReplicationLogCheckpointManager GetCkptManager ( StoreType storeType )
6974 {
@@ -112,7 +117,7 @@ public ReplicationManager(ClusterProvider clusterProvider, ILogger logger = null
112117 clusterProvider . GetReplicationLogCheckpointManager ( StoreType . Object ) . checkpointVersionShift = CheckpointVersionShift ;
113118
114119 // If this node starts as replica, it cannot serve requests until it is connected to primary
115- if ( clusterProvider . clusterManager . CurrentConfig . LocalNodeRole == NodeRole . REPLICA && clusterProvider . serverOptions . Recover && ! StartRecovery ( ) )
120+ if ( clusterProvider . clusterManager . CurrentConfig . LocalNodeRole == NodeRole . REPLICA && clusterProvider . serverOptions . Recover && ! StartRecovery ( RecoveryStatus . InitializeRecover ) )
116121 throw new Exception ( Encoding . ASCII . GetString ( CmdStrings . RESP_ERR_GENERIC_CANNOT_ACQUIRE_RECOVERY_LOCK ) ) ;
117122
118123 checkpointStore = new CheckpointStore ( storeWrapper , clusterProvider , true , logger ) ;
@@ -165,22 +170,24 @@ void CheckpointVersionShift(bool isMainStore, long oldVersion, long newVersion)
165170 /// <summary>
166171 /// Acquire recovery and checkpoint locks to prevent checkpoints and parallel recovery tasks
167172 /// </summary>
168- public bool StartRecovery ( )
173+ public bool StartRecovery ( RecoveryStatus recoverStatus )
169174 {
170175 if ( ! clusterProvider . storeWrapper . TryPauseCheckpoints ( ) )
171176 {
172- logger ? . LogError ( "Error could not acquire checkpoint lock" ) ;
177+ logger ? . LogError ( "Error could not acquire checkpoint lock [{recoverStatus}]" , recoverStatus ) ;
173178 return false ;
174179 }
175180
176181 if ( ! recoverLock . TryWriteLock ( ) )
177182 {
178- logger ? . LogError ( "Error could not acquire recover lock" ) ;
183+ logger ? . LogError ( "Error could not acquire recover lock [{recoverStatus}]" , recoverStatus ) ;
179184 // If failed to acquire recoverLock re-enable checkpoint taking
180185 clusterProvider . storeWrapper . ResumeCheckpoints ( ) ;
181186 return false ;
182187 }
183188
189+ this . recoverStatus = recoverStatus ;
190+ logger ? . LogTrace ( "Success recover lock [{recoverStatus}]" , recoverStatus ) ;
184191 return true ;
185192 }
186193
@@ -189,6 +196,8 @@ public bool StartRecovery()
189196 /// </summary>
190197 public void SuspendRecovery ( )
191198 {
199+ logger ? . LogTrace ( "Release recover lock [{recoverStatus}]" , recoverStatus ) ;
200+ recoverStatus = RecoveryStatus . NoRecovery ;
192201 recoverLock . WriteUnlock ( ) ;
193202 clusterProvider . storeWrapper . ResumeCheckpoints ( ) ;
194203 }
0 commit comments