Skip to content

Commit 42f1a30

Browse files
committed
Update pauser logic based on feedback and discussion
1 parent 4d22a34 commit 42f1a30

File tree

4 files changed

+68
-76
lines changed

4 files changed

+68
-76
lines changed

lib/src/main/java/com/scalar/admin/kubernetes/PauseFailedException.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
package com.scalar.admin.kubernetes;
22

3-
public class PauseFailedException extends Exception {
3+
public class PauseFailedException extends PauserException {
44
public PauseFailedException(String message) {
55
super(message);
66
}

lib/src/main/java/com/scalar/admin/kubernetes/Pauser.java

Lines changed: 65 additions & 73 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,6 @@
1010
import java.io.IOException;
1111
import java.net.InetSocketAddress;
1212
import java.time.Instant;
13-
import java.util.Objects;
1413
import java.util.concurrent.TimeUnit;
1514
import java.util.stream.Collectors;
1615
import javax.annotation.Nullable;
@@ -42,12 +41,6 @@ public class Pauser {
4241
private final TargetSelector targetSelector;
4342
private Instant startTime;
4443
private Instant endTime;
45-
private PauseFailedException pauseFailedException;
46-
private UnpauseFailedException unpauseFailedException;
47-
private StatusCheckFailedException statusCheckFailedException;
48-
private boolean pauseSuccessful = false;
49-
private boolean unpauseSuccessful = false;
50-
private boolean compareTargetSuccessful = false;
5144

5245
/**
5346
* @param namespace The namespace where the pods are deployed.
@@ -105,16 +98,17 @@ public PausedDuration pause(int pauseDuration, @Nullable Long maxPauseWaitTime)
10598
}
10699

107100
// Run pause operation.
101+
PauseFailedException pauseFailedException = null;
108102
try {
109-
pauseSuccessful = pauseInternal(requestCoordinator, pauseDuration, maxPauseWaitTime);
103+
pauseInternal(requestCoordinator, pauseDuration, maxPauseWaitTime);
110104
} catch (Exception e) {
111105
pauseFailedException = new PauseFailedException("Pause operation failed.", e);
112106
}
113107

114108
// Run unpause operation.
109+
UnpauseFailedException unpauseFailedException = null;
115110
try {
116-
unpauseSuccessful =
117-
unpauseWithRetry(requestCoordinator, MAX_UNPAUSE_RETRY_COUNT, targetBeforePause);
111+
unpauseWithRetry(requestCoordinator, MAX_UNPAUSE_RETRY_COUNT);
118112
} catch (Exception e) {
119113
unpauseFailedException = new UnpauseFailedException("Unpause operation failed.", e);
120114
}
@@ -130,82 +124,82 @@ public PausedDuration pause(int pauseDuration, @Nullable Long maxPauseWaitTime)
130124
e);
131125
}
132126

127+
// Prepare error messages for each process.
128+
String unpauseErrorMessage =
129+
String.format(
130+
"Unpause operation failed. Scalar products might still be in a paused state. You"
131+
+ " must restart related pods by using the `kubectl rollout restart deployment"
132+
+ " %s` command to unpause all pods. ",
133+
targetBeforePause.getDeployment().getMetadata().getName());
134+
String pauseErrorMessage =
135+
"Pause operation failed. You cannot use the backup that was taken during this pause"
136+
+ " duration. You need to retry the pause operation from the beginning to"
137+
+ " take a backup. ";
138+
String statusCheckErrorMessage =
139+
"Status check failed. You cannot use the backup that was taken during this pause"
140+
+ " duration. You need to retry the pause operation from the beginning to"
141+
+ " take a backup. ";
142+
String statusDifferentErrorMessage =
143+
"The target pods were updated during the pause duration. You cannot use the backup that"
144+
+ " was taken during this pause duration. ";
145+
String unpauseFailedButBackupOkErrorMessage =
146+
String.format(
147+
"Note that the pause operations for taking backup succeeded. You can use a backup that"
148+
+ " was taken during this pause duration: Start Time = %s, End Time = %s. ",
149+
startTime, endTime);
150+
133151
// Check if pods and deployment information are the same between before pause and after pause.
152+
boolean compareTargetSuccessful;
134153
try {
135-
compareTargetSuccessful = compareTargetStates(targetBeforePause, targetAfterPause);
154+
compareTargetSuccessful = compareTargetStatus(targetBeforePause, targetAfterPause);
136155
} catch (Exception e) {
137-
statusCheckFailedException = new StatusCheckFailedException("Status check failed.", e);
156+
if (unpauseFailedException == null) {
157+
throw new StatusCheckFailedException(statusCheckErrorMessage, e);
158+
} else {
159+
throw new UnpauseFailedException(unpauseErrorMessage, e);
160+
}
138161
}
139162

140163
// If both the pause operation and status check succeeded, you can use the backup that was taken
141164
// during the pause duration.
142-
boolean backupOk = pauseSuccessful && compareTargetSuccessful;
165+
boolean backupOk = (pauseFailedException == null) && compareTargetSuccessful;
166+
167+
// Create error message if any of the operations failed.
168+
StringBuilder errorMessageBuilder = new StringBuilder();
169+
if (unpauseFailedException != null) {
170+
errorMessageBuilder.append(unpauseErrorMessage);
171+
if (backupOk) {
172+
errorMessageBuilder.append(unpauseFailedButBackupOkErrorMessage);
173+
}
174+
}
175+
if (pauseFailedException != null) {
176+
errorMessageBuilder.append(pauseErrorMessage);
177+
}
178+
if (!compareTargetSuccessful) {
179+
errorMessageBuilder.append(statusDifferentErrorMessage);
180+
}
181+
String errorMessage = errorMessageBuilder.toString();
143182

144183
// Return the final result based on each process.
145-
if (backupOk) { // Backup OK
146-
if (unpauseSuccessful) { // Backup OK and Unpause OK
147-
return new PausedDuration(startTime, endTime);
148-
} else { // Backup OK but Unpause NG
149-
String errorMessage =
150-
String.format(
151-
"Unpause operation failed. Scalar products might still be in a paused state. You"
152-
+ " must restart related pods by using the `kubectl rollout restart deployment"
153-
+ " %s` command to unpause all pods. However, the pause operations for taking"
154-
+ " backup succeeded. You can use a backup that was taken during this pause"
155-
+ " duration: Start Time = %s, End Time = %s.",
156-
Objects.requireNonNull(targetBeforePause.getDeployment().getMetadata()).getName(),
157-
startTime,
158-
endTime);
159-
// Users who directly utilize this library, bypassing our CLI, are responsible for proper
160-
// exception handling. However, this scenario represents a critical issue. Consequently,
161-
// we output the error message here regardless of whether the calling code handles the
162-
// exception.
163-
logger.error(errorMessage);
164-
throw new UnpauseFailedException(errorMessage, unpauseFailedException);
165-
}
166-
} else { // Backup NG
167-
if (unpauseSuccessful) { // Backup NG but Unpause OK
168-
if (!pauseSuccessful) { // Backup NG (Pause operation failed) but Unpause OK
169-
String errorMessage =
170-
String.format(
171-
"Pause operation failed. You cannot use the backup that was taken during this"
172-
+ " pause duration. You need to retry the pause operation from the beginning"
173-
+ " to take a backup.");
174-
throw new PauseFailedException(errorMessage, pauseFailedException);
175-
} else { // Backup NG (Status check failed) but Unpause OK
176-
String errorMessage =
177-
String.format(
178-
"Status check failed. You cannot use the backup that was taken during this pause"
179-
+ " duration. You need to retry the pause operation from the beginning to"
180-
+ " take a backup.");
181-
throw new StatusCheckFailedException(errorMessage, statusCheckFailedException);
182-
}
183-
} else { // Backup NG and Unpause NG
184-
String errorMessage =
185-
String.format(
186-
"Pause and unpause operation failed. Scalar products might still be in a paused"
187-
+ " state. You must restart related pods by using the `kubectl rollout restart"
188-
+ " deployment %s` command to unpause all pods.",
189-
Objects.requireNonNull(targetBeforePause.getDeployment().getMetadata()).getName());
190-
// Users who directly utilize this library, bypassing our CLI, are responsible for proper
191-
// exception handling. However, this scenario represents a critical issue. Consequently,
192-
// we output the error message here regardless of whether the calling code handles the
193-
// exception.
194-
logger.error(errorMessage);
195-
throw new UnpauseFailedException(errorMessage, unpauseFailedException);
196-
}
184+
if (unpauseFailedException != null) { // Unpause issue is the most critical.
185+
throw new UnpauseFailedException(errorMessage, unpauseFailedException);
186+
} else if (pauseFailedException
187+
!= null) { // Pause issue might be caused by configuration error.
188+
throw new PauseFailedException(errorMessage, pauseFailedException);
189+
} else if (!compareTargetSuccessful) { // Status check issue might be caused by temporary issue.
190+
throw new PauseFailedException(errorMessage);
191+
} else { // All operations succeeded.
192+
return new PausedDuration(startTime, endTime);
197193
}
198194
}
199195

200196
@VisibleForTesting
201-
boolean unpauseWithRetry(RequestCoordinator coordinator, int maxRetryCount, TargetSnapshot target)
202-
throws PauserException {
197+
void unpauseWithRetry(RequestCoordinator coordinator, int maxRetryCount) {
203198
int retryCounter = 0;
204-
205199
while (true) {
206200
try {
207201
coordinator.unpause();
208-
return true;
202+
return;
209203
} catch (Exception e) {
210204
if (++retryCounter >= maxRetryCount) {
211205
throw e;
@@ -227,18 +221,16 @@ RequestCoordinator getRequestCoordinator(TargetSnapshot target) {
227221
.collect(Collectors.toList()));
228222
}
229223

230-
private boolean pauseInternal(
224+
void pauseInternal(
231225
RequestCoordinator requestCoordinator, int pauseDuration, @Nullable Long maxPauseWaitTime) {
232226

233227
requestCoordinator.pause(true, maxPauseWaitTime);
234228
startTime = Instant.now();
235229
Uninterruptibles.sleepUninterruptibly(pauseDuration, TimeUnit.MILLISECONDS);
236230
endTime = Instant.now();
237-
238-
return true;
239231
}
240232

241-
private boolean compareTargetStates(TargetSnapshot before, TargetSnapshot after) {
233+
boolean compareTargetStatus(TargetSnapshot before, TargetSnapshot after) {
242234
return before.getStatus().equals(after.getStatus());
243235
}
244236
}

lib/src/main/java/com/scalar/admin/kubernetes/StatusCheckFailedException.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
package com.scalar.admin.kubernetes;
22

3-
public class StatusCheckFailedException extends Exception {
3+
public class StatusCheckFailedException extends PauserException {
44
public StatusCheckFailedException(String message) {
55
super(message);
66
}

lib/src/main/java/com/scalar/admin/kubernetes/UnpauseFailedException.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
package com.scalar.admin.kubernetes;
22

3-
public class UnpauseFailedException extends Exception {
3+
public class UnpauseFailedException extends PauserException {
44
public UnpauseFailedException(String message) {
55
super(message);
66
}

0 commit comments

Comments
 (0)