1010import java .io .IOException ;
1111import java .net .InetSocketAddress ;
1212import java .time .Instant ;
13+ import java .util .Objects ;
1314import java .util .concurrent .TimeUnit ;
1415import java .util .stream .Collectors ;
1516import javax .annotation .Nullable ;
@@ -39,6 +40,14 @@ public class Pauser {
3940
4041 private final Logger logger = LoggerFactory .getLogger (Pauser .class );
4142 private final TargetSelector targetSelector ;
43+ private Instant startTime ;
44+ private Instant endTime ;
45+ private PauseFailedException pauseFailedException ;
46+ private UnpauseFailedException unpauseFailedException ;
47+ private StatusCheckFailedException statusCheckFailedException ;
48+ private boolean pauseSuccessful = false ;
49+ private boolean unpauseSuccessful = false ;
50+ private boolean compareTargetSuccessful = false ;
4251
4352 /**
4453 * @param namespace The namespace where the pods are deployed.
@@ -72,55 +81,45 @@ public Pauser(String namespace, String helmReleaseName) throws PauserException {
7281 * @return The start and end time of the pause operation.
7382 */
7483 public PausedDuration pause (int pauseDuration , @ Nullable Long maxPauseWaitTime )
75- throws PauserException {
84+ throws PauserException , UnpauseFailedException , PauseFailedException ,
85+ StatusCheckFailedException {
7686 if (pauseDuration < 1 ) {
7787 throw new IllegalArgumentException (
7888 "pauseDuration is required to be greater than 0 millisecond." );
7989 }
8090
81- TargetSnapshot target ;
91+ // Get pods and deployment information before pause.
92+ TargetSnapshot targetBeforePause ;
8293 try {
83- target = getTarget ();
94+ targetBeforePause = getTarget ();
8495 } catch (Exception e ) {
8596 throw new PauserException ("Failed to find the target pods to pause." , e );
8697 }
8798
88- RequestCoordinator coordinator ;
99+ // Get RequestCoordinator of Scalar Admin to pause.
100+ RequestCoordinator requestCoordinator ;
89101 try {
90- coordinator = getRequestCoordinator (target );
102+ requestCoordinator = getRequestCoordinator (targetBeforePause );
91103 } catch (Exception e ) {
92- throw new PauserException ("Failed to initialize the coordinator." , e );
104+ throw new PauserException ("Failed to initialize the request coordinator." , e );
93105 }
94106
95- Instant startTime ;
96- Instant endTime ;
107+ // Run pause operation.
97108 try {
98- coordinator .pause (true , maxPauseWaitTime );
99-
100- startTime = Instant .now ();
101-
102- Uninterruptibles .sleepUninterruptibly (pauseDuration , TimeUnit .MILLISECONDS );
103-
104- endTime = Instant .now ();
105-
106- unpauseWithRetry (coordinator , MAX_UNPAUSE_RETRY_COUNT , target );
109+ pauseSuccessful = pauseInternal (requestCoordinator , pauseDuration , maxPauseWaitTime );
110+ } catch (Exception e ) {
111+ pauseFailedException = new PauseFailedException ("Pause operation failed." , e );
112+ }
107113
114+ // Run unpause operation.
115+ try {
116+ unpauseSuccessful =
117+ unpauseWithRetry (requestCoordinator , MAX_UNPAUSE_RETRY_COUNT , targetBeforePause );
108118 } catch (Exception e ) {
109- try {
110- unpauseWithRetry (coordinator , MAX_UNPAUSE_RETRY_COUNT , target );
111- } catch (PauserException ex ) {
112- throw new PauserException ("unpauseWithRetry() method failed twice." , e );
113- } catch (Exception ex ) {
114- throw new PauserException (
115- "unpauseWithRetry() method failed twice due to unexpected exception." , e );
116- }
117- throw new PauserException (
118- "The pause operation failed for some reason. However, the unpause operation succeeded"
119- + " afterward. Currently, the scalar products are running with the unpause status."
120- + " You should retry the pause operation to ensure proper backup." ,
121- e );
119+ unpauseFailedException = new UnpauseFailedException ("Unpause operation failed." , e );
122120 }
123121
122+ // Get pods and deployment information after pause.
124123 TargetSnapshot targetAfterPause ;
125124 try {
126125 targetAfterPause = getTarget ();
@@ -131,41 +130,85 @@ public PausedDuration pause(int pauseDuration, @Nullable Long maxPauseWaitTime)
131130 e );
132131 }
133132
134- if (!target .getStatus ().equals (targetAfterPause .getStatus ())) {
135- throw new PauserException ("The target pods were updated during paused. Please retry." );
133+ // Check if pods and deployment information are the same between before pause and after pause.
134+ try {
135+ compareTargetSuccessful = compareTargetStates (targetBeforePause , targetAfterPause );
136+ } catch (Exception e ) {
137+ statusCheckFailedException = new StatusCheckFailedException ("Status check failed." , e );
136138 }
137139
138- return new PausedDuration (startTime , endTime );
140+ // If both the pause operation and status check succeeded, you can use the backup that was taken
141+ // during the pause duration.
142+ boolean backupOk = pauseSuccessful && compareTargetSuccessful ;
143+
144+ // Return the final result based on each process.
145+ if (backupOk ) { // Backup OK
146+ if (unpauseSuccessful ) { // Backup OK and Unpause OK
147+ return new PausedDuration (startTime , endTime );
148+ } else { // Backup OK but Unpause NG
149+ String errorMessage =
150+ String .format (
151+ "Unpause operation failed. Scalar products might still be in a paused state. You"
152+ + " must restart related pods by using the `kubectl rollout restart deployment"
153+ + " %s` command to unpause all pods. However, the pause operations for taking"
154+ + " backup succeeded. You can use a backup that was taken during this pause"
155+ + " duration: Start Time = %s, End Time = %s." ,
156+ Objects .requireNonNull (targetBeforePause .getDeployment ().getMetadata ()).getName (),
157+ startTime ,
158+ endTime );
159+ // Users who directly utilize this library, bypassing our CLI, are responsible for proper
160+ // exception handling. However, this scenario represents a critical issue. Consequently,
161+ // we output the error message here regardless of whether the calling code handles the
162+ // exception.
163+ logger .error (errorMessage );
164+ throw new UnpauseFailedException (errorMessage , unpauseFailedException );
165+ }
166+ } else { // Backup NG
167+ if (unpauseSuccessful ) { // Backup NG but Unpause OK
168+ if (!pauseSuccessful ) { // Backup NG (Pause operation failed) but Unpause OK
169+ String errorMessage =
170+ String .format (
171+ "Pause operation failed. You cannot use the backup that was taken during this"
172+ + " pause duration. You need to retry the pause operation from the beginning"
173+ + " to take a backup." );
174+ throw new PauseFailedException (errorMessage , pauseFailedException );
175+ } else { // Backup NG (Status check failed) but Unpause OK
176+ String errorMessage =
177+ String .format (
178+ "Status check failed. You cannot use the backup that was taken during this pause"
179+ + " duration. You need to retry the pause operation from the beginning to"
180+ + " take a backup." );
181+ throw new StatusCheckFailedException (errorMessage , statusCheckFailedException );
182+ }
183+ } else { // Backup NG and Unpause NG
184+ String errorMessage =
185+ String .format (
186+ "Pause and unpause operation failed. Scalar products might still be in a paused"
187+ + " state. You must restart related pods by using the `kubectl rollout restart"
188+ + " deployment %s` command to unpause all pods." ,
189+ Objects .requireNonNull (targetBeforePause .getDeployment ().getMetadata ()).getName ());
190+ // Users who directly utilize this library, bypassing our CLI, are responsible for proper
191+ // exception handling. However, this scenario represents a critical issue. Consequently,
192+ // we output the error message here regardless of whether the calling code handles the
193+ // exception.
194+ logger .error (errorMessage );
195+ throw new UnpauseFailedException (errorMessage , unpauseFailedException );
196+ }
197+ }
139198 }
140199
141200 @ VisibleForTesting
142- void unpauseWithRetry (RequestCoordinator coordinator , int maxRetryCount , TargetSnapshot target )
201+ boolean unpauseWithRetry (RequestCoordinator coordinator , int maxRetryCount , TargetSnapshot target )
143202 throws PauserException {
144203 int retryCounter = 0 ;
145204
146205 while (true ) {
147206 try {
148207 coordinator .unpause ();
149- return ;
208+ return true ;
150209 } catch (Exception e ) {
151210 if (++retryCounter >= maxRetryCount ) {
152- // Users who directly utilize this library, bypassing our CLI, are responsible for proper
153- // exception handling. However, this scenario represents a critical issue. Consequently,
154- // we output the error message here regardless of whether the calling code handles the
155- // exception.
156- logger .error (
157- "Failed to unpause Scalar product. They are still in paused. You must restart related"
158- + " pods by using the `kubectl rollout restart deployment {}`"
159- + " command to unpause all pods." ,
160- target .getDeployment ().getMetadata ().getName ());
161- // In our CLI, we catch this exception and output the message as an error on the CLI side.
162- throw new PauserException (
163- String .format (
164- "Failed to unpause Scalar product. They are still in paused. You must restart"
165- + " related pods by using the `kubectl rollout restart deployment %s` command"
166- + " to unpause all pods." ,
167- target .getDeployment ().getMetadata ().getName ()),
168- e );
211+ throw e ;
169212 }
170213 }
171214 }
@@ -183,4 +226,19 @@ RequestCoordinator getRequestCoordinator(TargetSnapshot target) {
183226 .map (p -> new InetSocketAddress (p .getStatus ().getPodIP (), target .getAdminPort ()))
184227 .collect (Collectors .toList ()));
185228 }
229+
230+ private boolean pauseInternal (
231+ RequestCoordinator requestCoordinator , int pauseDuration , @ Nullable Long maxPauseWaitTime ) {
232+
233+ requestCoordinator .pause (true , maxPauseWaitTime );
234+ startTime = Instant .now ();
235+ Uninterruptibles .sleepUninterruptibly (pauseDuration , TimeUnit .MILLISECONDS );
236+ endTime = Instant .now ();
237+
238+ return true ;
239+ }
240+
241+ private boolean compareTargetStates (TargetSnapshot before , TargetSnapshot after ) {
242+ return before .getStatus ().equals (after .getStatus ());
243+ }
186244}
0 commit comments